const fs = require("fs-extra"); const path = require("path"); const axios = require("axios"); const { imageSizeFromFile } = require("image-size/fromFile"); // 配置路径 const OUTPUT_FILE = "image_metadata.json"; const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms)); /** * 处理 Adobe Stock 图片 (保留网络请求以获取 tags) */ async function processAdobeStock(imageId, html, finalUrl) { // 1. 清理 URL 中的地区代码 (如 /hk/) const cleanUrl = finalUrl.replace( /stock\.adobe\.com\/[a-z]{2}\//i, "stock.adobe.com/", ); // 2. 提取 Keywords let tags = []; const keywordsRegex = /"keywords":\s*(\[.*?\])/; const match = html.match(keywordsRegex); if (match && match[1]) { try { tags = JSON.parse(match[1]); } catch (e) { console.error(`解析 Adobe Keywords JSON 失败: ${imageId}`); } } // 3. 数据清洗并取前 5 个 tags = [...new Set(tags.map((t) => t.trim()).filter((t) => t))].slice(0, 5); return { from: cleanUrl, tags: tags }; } /** * 主解析函数 */ async function parseImages(imageDir) { if (!imageDir) return; try { const files = await fs.readdir(imageDir); const results = []; // 支持 jpeg/jpg const imageFiles = files.filter((f) => /\.(jpe?g)$/i.test(f)); console.log(`找到 ${imageFiles.length} 张图片,准备开始解析...`); for (const filename of imageFiles) { let imageId, source; // 1. 判定来源 if (filename.startsWith("AdobeStock_")) { imageId = filename.match(/AdobeStock_(\d+)/)?.[1]; source = "AdobeStock"; } else if (filename.startsWith("shutterstock_")) { imageId = filename.match(/shutterstock_(\d+)/)?.[1]; source = "Shutterstock"; } if (!imageId) { console.warn(`跳过未知格式文件: ${filename}`); continue; } const filePath = path.join(imageDir, filename); console.log(`正在处理 [${source}] ID: ${imageId} ...`); try { // 2. 获取本地图片尺寸 (无论哪种来源都需要) const dimensions = await imageSizeFromFile(filePath); // 3. 分支处理 if (source === "Shutterstock") { // --- 改进点:Shutterstock 直接静态生成数据,不发请求 --- results.push({ id: imageId, source: source, filename: filename, localPath: filePath, from: `https://www.shutterstock.com/image-photo/${imageId}`, // 静态构造 tags: [], width: dimensions.width, height: dimensions.height, }); console.log( `✅ 成功 (静态构造): ${imageId} | URL: https://www.shutterstock.com/image-photo/${imageId}`, ); // 注意:Shutterstock 无需请求,不需要 sleep continue; } if (source === "AdobeStock") { // AdobeStock 依然需要请求网络以获取标签 const targetUrl = `https://stock.adobe.com/${imageId}`; let meta = { from: targetUrl, tags: [] }; try { const response = await axios.get(targetUrl, { headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", }, timeout: 10000, }); meta = await processAdobeStock( imageId, response.data, response.request.res.responseUrl || targetUrl, ); console.log(`✅ 成功: ${imageId} | Tags: [${meta.tags.join(", ")}]`); } catch (parseErr) { console.warn(`⚠️ Adobe 解析失败,使用默认值: ${imageId} | ${parseErr.message}`); } results.push({ id: imageId, source: source, filename: filename, localPath: filePath, from: meta.from, tags: meta.tags, width: dimensions.width, height: dimensions.height, }); // Adobe 请求后执行休眠,防止请求过快 await sleep(2000); } } catch (err) { console.error(`❌ 处理 ${filename} 失败: ${err.message}`); } } // 4. 输出 JSON await fs.writeJson(OUTPUT_FILE, results, { spaces: 2 }); console.log(`\n🎉 全部完成!元数据已保存至 ${OUTPUT_FILE}`); } catch (err) { console.error("程序运行出错:", err); } } // 导出 module.exports = { parseImages }; // 脚本测试入口 if (require.main === module) { // 获取命令行参数或使用默认路径 const testDir = process.argv[2] || "/Users/guoziyun/content/picture-jigasw/shuttshock/test"; parseImages(testDir); }