소스 검색

升级similar算法

guoziyun 10 달 전
부모
커밋
339a0e88de
5개의 변경된 파일289개의 추가작업 그리고 25개의 파일을 삭제
  1. 32 22
      routes/v2/detail.js
  2. 254 0
      routes/v2/similarArts.js
  3. 1 1
      views/v2/app.ejs
  4. 1 1
      views/v2/detail.ejs
  5. 1 1
      views/v2/footer.ejs

+ 32 - 22
routes/v2/detail.js

@@ -9,6 +9,8 @@ const categories = require('../../config/category');
 const { tags } = require('../../config/tag');
 const { coloringList } = require('./config');
 
+const { getSimilarArts, getSimilarArtsSimple } = require('./similarArts');
+
 const CACHE_PREFIX = "art_v2";
 // const CACHE_EXPIRES = 60; // 60s刷新一次
 const CACHE_EXPIRES = 600;
@@ -37,24 +39,31 @@ router.get('/:id', function (req, res, next) {
       common.organizeDetail(doc);
 
       // 关联推荐
-      let mytags = [...doc.tags];
-      let cates = categories.map(e => e.id);
-      let alltags = tags.map(e => e.tag);
-      mytags = mytags.filter(e => !cates.includes(e) && alltags.includes(e));
-      if (mytags.length == 0) mytags = [...doc.tags];
-
-      let query = {
-        page: req.query.page,
-        length: req.query.length,
-        search: req.query.search,
-        orderBy: 'publishTime',
-        order: 'desc',
-        base: { open: true, status: 9000 },
-        filters: { tags: mytags },
-      }
+      // let mytags = [...doc.tags];
+      // let cates = categories.map(e => e.id);
+      // let alltags = tags.map(e => e.tag);
+      // mytags = mytags.filter(e => !cates.includes(e) && alltags.includes(e));
+      // if (mytags.length == 0) mytags = [...doc.tags];
+
+      // let query = {
+      //   page: req.query.page,
+      //   length: req.query.length,
+      //   search: req.query.search,
+      //   orderBy: 'publishTime',
+      //   order: 'desc',
+      //   base: { open: true, status: 9000 },
+      //   filters: { tags: mytags },
+      // }
+
+      // let result = await getListBuilder(query, models.Art, [{ path: 'user', select: 'username' }]);
+      // common.organizeData(result.data);
+
+      // 用新的算法
+      console.time('getSimilarArts');
+      let result = await  getSimilarArts(id, {limit: 30,  candidateLimit: 1000, fields : artSelect });
+      console.timeEnd('getSimilarArts')
+      common.organizeData(result);
 
-      let result = await getListBuilder(query, models.Art, [{ path: 'user', select: 'username' }]);
-      common.organizeData(result.data);
 
       // 填色页合集推荐
       const recmCollections = recommendColoringPages(doc, coloringList);
@@ -85,13 +94,14 @@ router.get('/:id', function (req, res, next) {
         title: `${doc.seoTitle}`,
         description: `${doc.seoDescription}`,
         detail: doc,
-        data: result.data,
+        // data: result.data,
         page: result.page,
         pageId: doc._id,
-        length: result.length,
-        recordsFiltered: result.recordsFiltered,
-        recordsTotal: result.recordsTotal,
-        relates: result.data,
+        // length: result.length,
+        // recordsFiltered: result.recordsFiltered,
+        // recordsTotal: result.recordsTotal,
+        // relates: result.data,
+        relates: result,
         uri: `/coloring-page/${doc._id}`,
         imageUrl: doc.thumb,
         pageUri: common.replaceUriParams,

+ 254 - 0
routes/v2/similarArts.js

@@ -0,0 +1,254 @@
+const models = require('../../models');
+
+// 主分类
+const mainTags = ['latest', 'data_good', 'animal', 'people', 'mandala', 'scenery', 'life', 'plant', 'food', 'fantasy', 'culture', 'famous', 'places', 'building', 'special_date'];
+const mainTagSet = new Set(mainTags);
+
+// 次分类
+const minorTags = [
+  'cat', 'landscape', 'dog', 'countryside', 'forest', 'bird', 'river', 'mountains', 'snow', 'winter', 
+  'house', 'village', 'heart', 'Christmas', 'garden', 'butterfly', 'fashion', 'summer', 'farm', 'boy',
+  'sea', 'car', 'horse', 'lake', 'autumn', 'tree', 'wild', 'woman', 'room', 'park', 'ocean', 'meadow', 
+  'rabbit', 'family',  'home', 'mountain', 'halloween', 'bridge', 'friends', 'city', 'baby', 'sunset',
+  'simple', 'boat', 'window', 'man', 'trees', 'fruit', 'rose', 'vacation', 'evening', 'castle', 'snowman',
+  'street', 'tiger', 'grass', 'lady', 'child', 'vintage', 'holiday', 'pasture', 'deer', 'sweet', 'night',
+  'beach', 'travel', 'dress', 'fish', 'couple', 'view', 'fairy', 'yard', 'sky', 'toys', 'wildflowers',
+  'love', 'pumpkin', 'water', 'transportation', 'birds', 'rocks', 'downtown', 'ice', 'beer', 'spring',
+  'road', 'wolf', 'unicorn', 'lion', 'stone', 'magic', 'cake', 'book', 'furniture', 'children', 'fox', 
+  'cartoon', 'duck', 'owl', 'sport', 'angel', 'cub', 'decorations', 'pond', 'sheep', 'fence', 'interior',
+  'coastline', 'beauty', 'chicken', 'train', 'mermaid', 'history', 'moon', 'picnic', 'bedroom', 'blooming',
+  'sunflower', 'lovers', 'stairs', 'walk', 'jungle', 'livingroom', 'thanksgiving', 'kingdom', 'domestic',
+  'mother', 'lotus', 'dragon', 'panda', 'pet', 'cottage', 'tea', 'coast'
+];
+const minorTagSet = new Set(minorTags);
+
+/**
+ * 拆分标签为主要分类、次要分类、其他分类(使用Set提高查询效率)
+ * @param {Array} tags 标签数组
+ * @returns {Object} 包含主分类和小分类标签的Set对象
+ */
+const splitTagsToSets = (tags = []) => {
+  const main = new Set();
+  const minor = new Set();
+  const other = new Set();
+  
+  tags.forEach(tag => {
+    if (mainTagSet.has(tag)) {
+      main.add(tag);
+    } else if (minorTagSet.has(tag)) {
+      minor.add(tag);
+    }else {
+      other.add(tag);
+    }
+  });
+  
+  return { main, minor, other };
+};
+
+/**
+ * 计算标签匹配得分(优化版,使用Set.has提高效率)
+ * 分数权重: 主分类得分最低为1分,次分类是重点分类,得4分,其他分类得2分
+ * @param {Object} targetTagSets 目标作品的标签集合(主分类和小分类)
+ * @param {Array} candidateTags 候选作品标签数组
+ * @returns {Number} 匹配得分
+ */
+const calculateScore = (targetTagSets, candidateTags) => {
+  const { main: targetMain, minor: targetMinor, other: targetOther } = targetTagSets;
+  let minorMatches = 0;
+  let otherMatches = 0;
+  let mainMatches = 0;
+  
+  // 遍历候选标签,直接判断归属并计数(O(1)复杂度)
+  candidateTags.forEach(tag => {
+    if (targetMinor.has(tag)) {
+      minorMatches++; 
+    } else if (targetOther.has(tag)) {
+      otherMatches++;
+    } else if (targetMain.has(tag)) {
+      mainMatches++;
+    }
+  });
+  
+  return minorMatches * 4 + otherMatches * 2 + mainMatches;
+};
+
+/**
+ * 从结果中随机选择指定数量的作品
+ * @param {Array} results 排序后的结果数组
+ * @param {Number} limit 最大返回数量
+ * @returns {Array} 随机选择后的结果
+ */
+const randomizeResults = (results, limit) => {
+  if (results.length <= limit) return results;
+  
+  // 按分数分组
+  const scoreGroups = {};
+  results.forEach(item => {
+    if (!scoreGroups[item.score]) {
+      scoreGroups[item.score] = [];
+    }
+    scoreGroups[item.score].push(item);
+  });
+  
+  // 按分数从高到低排序分组
+  const sortedScores = Object.keys(scoreGroups).sort((a, b) => b - a);
+  
+  const finalResults = [];
+  let remaining = limit;
+  
+  // 从高分到低分依次随机选取
+  for (const score of sortedScores) {
+    if (remaining <= 0) break;
+    
+    const group = scoreGroups[score];
+    // 打乱当前分组
+    const shuffled = [...group].sort(() => Math.random() - 0.5);
+    // 取需要的数量或全部
+    const take = Math.min(remaining, shuffled.length);
+    finalResults.push(...shuffled.slice(0, take));
+    remaining -= take;
+  }
+  
+  return finalResults;
+};
+
+/**
+ * 获取相似作品(优化性能版本)
+ * @param {String} artId 目标作品ID
+ * @param {Object} options 配置选项
+ * @param {Number} options.limit 最大返回数量,默认100
+ * @param {Number} options.candidateLimit 候选集最大数量,默认2000
+ * @param {Date} options.dateThreshold 时间过滤阈值(可选)
+ * @param {String} options.fields 需要返回的字段,默认返回所有字段
+ * @returns {Promise<Array>} 相似作品列表
+ */
+const getSimilarArts = async (artId, {
+  limit = 100,
+  candidateLimit = 2000,
+  dateThreshold = null,
+  fields = null
+} = {}) => {
+  try {
+    // 获取目标作品(只查询需要的字段)
+    const projection = fields || '';
+    const targetArt = await models.Art.findById(artId, projection).lean();
+    
+    if (!targetArt) {
+      throw new Error('作品不存在');
+    }
+    
+    const targetTags = targetArt.tags || [];
+    
+    // 如果目标作品没有标签,返回随机作品
+    if (targetTags.length === 0) {
+      const query = { _id: { $ne: artId } };
+      if (dateThreshold) {
+        query.createdAt = { $gte: dateThreshold };
+      }
+      
+      return Art.find(query)
+        .select(projection)
+        .limit(limit)
+        .sort({ $natural: -1 }) // 自然排序,接近随机
+        .lean();
+    }
+    
+    // 提取所有相关标签(去重)
+    const allTags = [...new Set(targetTags)];
+    // 提前处理目标标签为Set,提高后续计算效率
+    const targetTagSets = splitTagsToSets(targetTags);
+    
+    // 构建查询条件
+    const query = {
+      _id: { $ne: artId },
+      tags: { $in: allTags },
+      status: 9000,
+    };
+    
+    // 可选:添加时间过滤,缩小候选集范围
+    if (dateThreshold) {
+      query.createdAt = { $gte: dateThreshold };
+    }
+    
+    // 查询候选作品(限制数量、只返回必要字段)
+    const candidates = await models.Art.find(query)
+      .select(projection)
+      .populate('user', 'username')
+      .limit(candidateLimit)
+      .lean();
+    
+    // 计算每个候选作品的匹配得分
+    const scoredCandidates = candidates.map(candidate => ({
+      ...candidate,
+      score: calculateScore(targetTagSets, candidate.tags || [])
+    }));
+    
+    // 过滤掉得分为0的作品并按分数降序排序
+    const sortedResults = scoredCandidates
+      .filter(item => item.score > 0)
+      .sort((a, b) => b.score - a);
+    
+    // 随机选择结果
+    const finalResults = randomizeResults(sortedResults, limit);
+    
+    // 移除分数字段(如果不需要返回)
+    return finalResults.map(({ score, ...rest }) => rest);
+    
+  } catch (error) {
+    console.error('获取相似作品失败:', error);
+    throw error; // 抛出错误让调用方处理
+  }
+};
+
+
+/**
+ * 简单算法, 同artsite网站
+ * @param {*} artId 
+ * @param {*} param1 
+ * @returns 
+ */
+const getSimilarArtsSimple = async (artId, {
+  limit = 200,
+  fields = null
+} = {}) => {
+  try {
+    // 获取目标作品(只查询需要的字段)
+    const projection = fields || '';
+    const targetArt = await models.Art.findById(artId, projection).lean();
+    
+    if (!targetArt) {
+      throw new Error('作品不存在');
+    }
+    
+    const targetTags = targetArt.tags || [];
+
+    const filteredTags = targetTags.filter(tag => !mainTagSet.has(tag)) || targetTags;
+    
+    // 构建查询条件
+    const query = {
+      _id: { $ne: artId },
+      tags: { $in: filteredTags },
+      status: 9000,
+    };
+
+    
+    const docs = await models.Art.find(query)
+      .select(projection)
+      .populate('user', 'username')
+      .limit(limit)
+      .sort({ publishTime: 'desc' })
+      .lean();
+    
+    return docs;
+    
+  } catch (error) {
+    console.error('获取相似作品失败:', error);
+    throw error; // 抛出错误让调用方处理
+  }
+};
+ 
+
+module.exports = {
+  getSimilarArts,
+  getSimilarArtsSimple,
+};

+ 1 - 1
views/v2/app.ejs

@@ -797,7 +797,7 @@
         <div class="container">
             <!-- APP 图标 -->
             <div class="app-icon">
-                <img src="/assets/icon/logo_640x640.webp" alt="Art Color App Logo">
+                <img src="/assets/icon/icon.webp" alt="Art Color App Logo">
             </div>
             <h1 class="app-title">🎨 Art Color - Color by Number 🎨</h1>
             <div class="app-subtitle">

+ 1 - 1
views/v2/detail.ejs

@@ -97,7 +97,7 @@
     }
 
     .poster img {
-      max-width: 100%;
+      max-width: 80%;
       height: auto;
       border-radius: 8px;
       display: block;

+ 1 - 1
views/v2/footer.ejs

@@ -4,7 +4,7 @@
             <div>
                 <a href="/" style="color: white;">
                     <div class="footer-logo">
-                        <img src="/assets/icon/logo3.webp" alt="Art Color Logo">
+                        <img src="/assets/icon/logo4.webp" alt="Art Color Logo">
                         <span>Art Color</span>
                     </div>
                 </a>