similarArts.js 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254
  1. const models = require('../../models');
  2. // 主分类
  3. const mainTags = ['latest', 'data_good', 'animal', 'people', 'mandala', 'scenery', 'life', 'plant', 'food', 'fantasy', 'culture', 'famous', 'places', 'building', 'special_date'];
  4. const mainTagSet = new Set(mainTags);
  5. // 次分类
  6. const minorTags = [
  7. 'cat', 'landscape', 'dog', 'countryside', 'forest', 'bird', 'river', 'mountains', 'snow', 'winter',
  8. 'house', 'village', 'heart', 'Christmas', 'garden', 'butterfly', 'fashion', 'summer', 'farm', 'boy',
  9. 'sea', 'car', 'horse', 'lake', 'autumn', 'tree', 'wild', 'woman', 'room', 'park', 'ocean', 'meadow',
  10. 'rabbit', 'family', 'home', 'mountain', 'halloween', 'bridge', 'friends', 'city', 'baby', 'sunset',
  11. 'simple', 'boat', 'window', 'man', 'trees', 'fruit', 'rose', 'vacation', 'evening', 'castle', 'snowman',
  12. 'street', 'tiger', 'grass', 'lady', 'child', 'vintage', 'holiday', 'pasture', 'deer', 'sweet', 'night',
  13. 'beach', 'travel', 'dress', 'fish', 'couple', 'view', 'fairy', 'yard', 'sky', 'toys', 'wildflowers',
  14. 'love', 'pumpkin', 'water', 'transportation', 'birds', 'rocks', 'downtown', 'ice', 'beer', 'spring',
  15. 'road', 'wolf', 'unicorn', 'lion', 'stone', 'magic', 'cake', 'book', 'furniture', 'children', 'fox',
  16. 'cartoon', 'duck', 'owl', 'sport', 'angel', 'cub', 'decorations', 'pond', 'sheep', 'fence', 'interior',
  17. 'coastline', 'beauty', 'chicken', 'train', 'mermaid', 'history', 'moon', 'picnic', 'bedroom', 'blooming',
  18. 'sunflower', 'lovers', 'stairs', 'walk', 'jungle', 'livingroom', 'thanksgiving', 'kingdom', 'domestic',
  19. 'mother', 'lotus', 'dragon', 'panda', 'pet', 'cottage', 'tea', 'coast'
  20. ];
  21. const minorTagSet = new Set(minorTags);
  22. /**
  23. * 拆分标签为主要分类、次要分类、其他分类(使用Set提高查询效率)
  24. * @param {Array} tags 标签数组
  25. * @returns {Object} 包含主分类和小分类标签的Set对象
  26. */
  27. const splitTagsToSets = (tags = []) => {
  28. const main = new Set();
  29. const minor = new Set();
  30. const other = new Set();
  31. tags.forEach(tag => {
  32. if (mainTagSet.has(tag)) {
  33. main.add(tag);
  34. } else if (minorTagSet.has(tag)) {
  35. minor.add(tag);
  36. }else {
  37. other.add(tag);
  38. }
  39. });
  40. return { main, minor, other };
  41. };
  42. /**
  43. * 计算标签匹配得分(优化版,使用Set.has提高效率)
  44. * 分数权重: 主分类得分最低为1分,次分类是重点分类,得4分,其他分类得2分
  45. * @param {Object} targetTagSets 目标作品的标签集合(主分类和小分类)
  46. * @param {Array} candidateTags 候选作品标签数组
  47. * @returns {Number} 匹配得分
  48. */
  49. const calculateScore = (targetTagSets, candidateTags) => {
  50. const { main: targetMain, minor: targetMinor, other: targetOther } = targetTagSets;
  51. let minorMatches = 0;
  52. let otherMatches = 0;
  53. let mainMatches = 0;
  54. // 遍历候选标签,直接判断归属并计数(O(1)复杂度)
  55. candidateTags.forEach(tag => {
  56. if (targetMinor.has(tag)) {
  57. minorMatches++;
  58. } else if (targetOther.has(tag)) {
  59. otherMatches++;
  60. } else if (targetMain.has(tag)) {
  61. mainMatches++;
  62. }
  63. });
  64. return minorMatches * 4 + otherMatches * 2 + mainMatches;
  65. };
  66. /**
  67. * 从结果中随机选择指定数量的作品
  68. * @param {Array} results 排序后的结果数组
  69. * @param {Number} limit 最大返回数量
  70. * @returns {Array} 随机选择后的结果
  71. */
  72. const randomizeResults = (results, limit) => {
  73. if (results.length <= limit) return results;
  74. // 按分数分组
  75. const scoreGroups = {};
  76. results.forEach(item => {
  77. if (!scoreGroups[item.score]) {
  78. scoreGroups[item.score] = [];
  79. }
  80. scoreGroups[item.score].push(item);
  81. });
  82. // 按分数从高到低排序分组
  83. const sortedScores = Object.keys(scoreGroups).sort((a, b) => b - a);
  84. const finalResults = [];
  85. let remaining = limit;
  86. // 从高分到低分依次随机选取
  87. for (const score of sortedScores) {
  88. if (remaining <= 0) break;
  89. const group = scoreGroups[score];
  90. // 打乱当前分组
  91. const shuffled = [...group].sort(() => Math.random() - 0.5);
  92. // 取需要的数量或全部
  93. const take = Math.min(remaining, shuffled.length);
  94. finalResults.push(...shuffled.slice(0, take));
  95. remaining -= take;
  96. }
  97. return finalResults;
  98. };
  99. /**
  100. * 获取相似作品(优化性能版本)
  101. * @param {String} artId 目标作品ID
  102. * @param {Object} options 配置选项
  103. * @param {Number} options.limit 最大返回数量,默认100
  104. * @param {Number} options.candidateLimit 候选集最大数量,默认2000
  105. * @param {Date} options.dateThreshold 时间过滤阈值(可选)
  106. * @param {String} options.fields 需要返回的字段,默认返回所有字段
  107. * @returns {Promise<Array>} 相似作品列表
  108. */
  109. const getSimilarArts = async (artId, {
  110. limit = 100,
  111. candidateLimit = 2000,
  112. dateThreshold = null,
  113. fields = null
  114. } = {}) => {
  115. try {
  116. // 获取目标作品(只查询需要的字段)
  117. const projection = fields || '';
  118. const targetArt = await models.Art.findById(artId, projection).lean();
  119. if (!targetArt) {
  120. throw new Error('作品不存在');
  121. }
  122. const targetTags = targetArt.tags || [];
  123. // 如果目标作品没有标签,返回随机作品
  124. if (targetTags.length === 0) {
  125. const query = { _id: { $ne: artId } };
  126. if (dateThreshold) {
  127. query.createdAt = { $gte: dateThreshold };
  128. }
  129. return Art.find(query)
  130. .select(projection)
  131. .limit(limit)
  132. .sort({ $natural: -1 }) // 自然排序,接近随机
  133. .lean();
  134. }
  135. // 提取所有相关标签(去重)
  136. const allTags = [...new Set(targetTags)];
  137. // 提前处理目标标签为Set,提高后续计算效率
  138. const targetTagSets = splitTagsToSets(targetTags);
  139. // 构建查询条件
  140. const query = {
  141. _id: { $ne: artId },
  142. tags: { $in: allTags },
  143. status: 9000,
  144. };
  145. // 可选:添加时间过滤,缩小候选集范围
  146. if (dateThreshold) {
  147. query.createdAt = { $gte: dateThreshold };
  148. }
  149. // 查询候选作品(限制数量、只返回必要字段)
  150. const candidates = await models.Art.find(query)
  151. .select(projection)
  152. .populate('user', 'username')
  153. .limit(candidateLimit)
  154. .lean();
  155. // 计算每个候选作品的匹配得分
  156. const scoredCandidates = candidates.map(candidate => ({
  157. ...candidate,
  158. score: calculateScore(targetTagSets, candidate.tags || [])
  159. }));
  160. // 过滤掉得分为0的作品并按分数降序排序
  161. const sortedResults = scoredCandidates
  162. .filter(item => item.score > 0)
  163. .sort((a, b) => b.score - a);
  164. // 随机选择结果
  165. const finalResults = randomizeResults(sortedResults, limit);
  166. // 移除分数字段(如果不需要返回)
  167. return finalResults.map(({ score, ...rest }) => rest);
  168. } catch (error) {
  169. console.error('获取相似作品失败:', error);
  170. throw error; // 抛出错误让调用方处理
  171. }
  172. };
  173. /**
  174. * 简单算法, 同artsite网站
  175. * @param {*} artId
  176. * @param {*} param1
  177. * @returns
  178. */
  179. const getSimilarArtsSimple = async (artId, {
  180. limit = 200,
  181. fields = null
  182. } = {}) => {
  183. try {
  184. // 获取目标作品(只查询需要的字段)
  185. const projection = fields || '';
  186. const targetArt = await models.Art.findById(artId, projection).lean();
  187. if (!targetArt) {
  188. throw new Error('作品不存在');
  189. }
  190. const targetTags = targetArt.tags || [];
  191. const filteredTags = targetTags.filter(tag => !mainTagSet.has(tag)) || targetTags;
  192. // 构建查询条件
  193. const query = {
  194. _id: { $ne: artId },
  195. tags: { $in: filteredTags },
  196. status: 9000,
  197. };
  198. const docs = await models.Art.find(query)
  199. .select(projection)
  200. .populate('user', 'username')
  201. .limit(limit)
  202. .sort({ publishTime: 'desc' })
  203. .lean();
  204. return docs;
  205. } catch (error) {
  206. console.error('获取相似作品失败:', error);
  207. throw error; // 抛出错误让调用方处理
  208. }
  209. };
  210. module.exports = {
  211. getSimilarArts,
  212. getSimilarArtsSimple,
  213. };