| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254 |
- const models = require('../../models');
- // 主分类
- const mainTags = ['latest', 'data_good', 'animal', 'people', 'mandala', 'scenery', 'life', 'plant', 'food', 'fantasy', 'culture', 'famous', 'places', 'building', 'special_date'];
- const mainTagSet = new Set(mainTags);
- // 次分类
- const minorTags = [
- 'cat', 'landscape', 'dog', 'countryside', 'forest', 'bird', 'river', 'mountains', 'snow', 'winter',
- 'house', 'village', 'heart', 'Christmas', 'garden', 'butterfly', 'fashion', 'summer', 'farm', 'boy',
- 'sea', 'car', 'horse', 'lake', 'autumn', 'tree', 'wild', 'woman', 'room', 'park', 'ocean', 'meadow',
- 'rabbit', 'family', 'home', 'mountain', 'halloween', 'bridge', 'friends', 'city', 'baby', 'sunset',
- 'simple', 'boat', 'window', 'man', 'trees', 'fruit', 'rose', 'vacation', 'evening', 'castle', 'snowman',
- 'street', 'tiger', 'grass', 'lady', 'child', 'vintage', 'holiday', 'pasture', 'deer', 'sweet', 'night',
- 'beach', 'travel', 'dress', 'fish', 'couple', 'view', 'fairy', 'yard', 'sky', 'toys', 'wildflowers',
- 'love', 'pumpkin', 'water', 'transportation', 'birds', 'rocks', 'downtown', 'ice', 'beer', 'spring',
- 'road', 'wolf', 'unicorn', 'lion', 'stone', 'magic', 'cake', 'book', 'furniture', 'children', 'fox',
- 'cartoon', 'duck', 'owl', 'sport', 'angel', 'cub', 'decorations', 'pond', 'sheep', 'fence', 'interior',
- 'coastline', 'beauty', 'chicken', 'train', 'mermaid', 'history', 'moon', 'picnic', 'bedroom', 'blooming',
- 'sunflower', 'lovers', 'stairs', 'walk', 'jungle', 'livingroom', 'thanksgiving', 'kingdom', 'domestic',
- 'mother', 'lotus', 'dragon', 'panda', 'pet', 'cottage', 'tea', 'coast'
- ];
- const minorTagSet = new Set(minorTags);
- /**
- * 拆分标签为主要分类、次要分类、其他分类(使用Set提高查询效率)
- * @param {Array} tags 标签数组
- * @returns {Object} 包含主分类和小分类标签的Set对象
- */
- const splitTagsToSets = (tags = []) => {
- const main = new Set();
- const minor = new Set();
- const other = new Set();
-
- tags.forEach(tag => {
- if (mainTagSet.has(tag)) {
- main.add(tag);
- } else if (minorTagSet.has(tag)) {
- minor.add(tag);
- }else {
- other.add(tag);
- }
- });
-
- return { main, minor, other };
- };
- /**
- * 计算标签匹配得分(优化版,使用Set.has提高效率)
- * 分数权重: 主分类得分最低为1分,次分类是重点分类,得4分,其他分类得2分
- * @param {Object} targetTagSets 目标作品的标签集合(主分类和小分类)
- * @param {Array} candidateTags 候选作品标签数组
- * @returns {Number} 匹配得分
- */
- const calculateScore = (targetTagSets, candidateTags) => {
- const { main: targetMain, minor: targetMinor, other: targetOther } = targetTagSets;
- let minorMatches = 0;
- let otherMatches = 0;
- let mainMatches = 0;
-
- // 遍历候选标签,直接判断归属并计数(O(1)复杂度)
- candidateTags.forEach(tag => {
- if (targetMinor.has(tag)) {
- minorMatches++;
- } else if (targetOther.has(tag)) {
- otherMatches++;
- } else if (targetMain.has(tag)) {
- mainMatches++;
- }
- });
-
- return minorMatches * 4 + otherMatches * 2 + mainMatches;
- };
- /**
- * 从结果中随机选择指定数量的作品
- * @param {Array} results 排序后的结果数组
- * @param {Number} limit 最大返回数量
- * @returns {Array} 随机选择后的结果
- */
- const randomizeResults = (results, limit) => {
- if (results.length <= limit) return results;
-
- // 按分数分组
- const scoreGroups = {};
- results.forEach(item => {
- if (!scoreGroups[item.score]) {
- scoreGroups[item.score] = [];
- }
- scoreGroups[item.score].push(item);
- });
-
- // 按分数从高到低排序分组
- const sortedScores = Object.keys(scoreGroups).sort((a, b) => b - a);
-
- const finalResults = [];
- let remaining = limit;
-
- // 从高分到低分依次随机选取
- for (const score of sortedScores) {
- if (remaining <= 0) break;
-
- const group = scoreGroups[score];
- // 打乱当前分组
- const shuffled = [...group].sort(() => Math.random() - 0.5);
- // 取需要的数量或全部
- const take = Math.min(remaining, shuffled.length);
- finalResults.push(...shuffled.slice(0, take));
- remaining -= take;
- }
-
- return finalResults;
- };
- /**
- * 获取相似作品(优化性能版本)
- * @param {String} artId 目标作品ID
- * @param {Object} options 配置选项
- * @param {Number} options.limit 最大返回数量,默认100
- * @param {Number} options.candidateLimit 候选集最大数量,默认2000
- * @param {Date} options.dateThreshold 时间过滤阈值(可选)
- * @param {String} options.fields 需要返回的字段,默认返回所有字段
- * @returns {Promise<Array>} 相似作品列表
- */
- const getSimilarArts = async (artId, {
- limit = 100,
- candidateLimit = 2000,
- dateThreshold = null,
- fields = null
- } = {}) => {
- try {
- // 获取目标作品(只查询需要的字段)
- const projection = fields || '';
- const targetArt = await models.Art.findById(artId, projection).lean();
-
- if (!targetArt) {
- throw new Error('作品不存在');
- }
-
- const targetTags = targetArt.tags || [];
-
- // 如果目标作品没有标签,返回随机作品
- if (targetTags.length === 0) {
- const query = { _id: { $ne: artId } };
- if (dateThreshold) {
- query.createdAt = { $gte: dateThreshold };
- }
-
- return Art.find(query)
- .select(projection)
- .limit(limit)
- .sort({ $natural: -1 }) // 自然排序,接近随机
- .lean();
- }
-
- // 提取所有相关标签(去重)
- const allTags = [...new Set(targetTags)];
- // 提前处理目标标签为Set,提高后续计算效率
- const targetTagSets = splitTagsToSets(targetTags);
-
- // 构建查询条件
- const query = {
- _id: { $ne: artId },
- tags: { $in: allTags },
- status: 9000,
- };
-
- // 可选:添加时间过滤,缩小候选集范围
- if (dateThreshold) {
- query.createdAt = { $gte: dateThreshold };
- }
-
- // 查询候选作品(限制数量、只返回必要字段)
- const candidates = await models.Art.find(query)
- .select(projection)
- .populate('user', 'username')
- .limit(candidateLimit)
- .lean();
-
- // 计算每个候选作品的匹配得分
- const scoredCandidates = candidates.map(candidate => ({
- ...candidate,
- score: calculateScore(targetTagSets, candidate.tags || [])
- }));
-
- // 过滤掉得分为0的作品并按分数降序排序
- const sortedResults = scoredCandidates
- .filter(item => item.score > 0)
- .sort((a, b) => b.score - a);
-
- // 随机选择结果
- const finalResults = randomizeResults(sortedResults, limit);
-
- // 移除分数字段(如果不需要返回)
- return finalResults.map(({ score, ...rest }) => rest);
-
- } catch (error) {
- console.error('获取相似作品失败:', error);
- throw error; // 抛出错误让调用方处理
- }
- };
- /**
- * 简单算法, 同artsite网站
- * @param {*} artId
- * @param {*} param1
- * @returns
- */
- const getSimilarArtsSimple = async (artId, {
- limit = 200,
- fields = null
- } = {}) => {
- try {
- // 获取目标作品(只查询需要的字段)
- const projection = fields || '';
- const targetArt = await models.Art.findById(artId, projection).lean();
-
- if (!targetArt) {
- throw new Error('作品不存在');
- }
-
- const targetTags = targetArt.tags || [];
- const filteredTags = targetTags.filter(tag => !mainTagSet.has(tag)) || targetTags;
-
- // 构建查询条件
- const query = {
- _id: { $ne: artId },
- tags: { $in: filteredTags },
- status: 9000,
- };
-
- const docs = await models.Art.find(query)
- .select(projection)
- .populate('user', 'username')
- .limit(limit)
- .sort({ publishTime: 'desc' })
- .lean();
-
- return docs;
-
- } catch (error) {
- console.error('获取相似作品失败:', error);
- throw error; // 抛出错误让调用方处理
- }
- };
-
- module.exports = {
- getSimilarArts,
- getSimilarArtsSimple,
- };
|