diff --git a/.gitignore b/.gitignore index cda87158..a4a3292b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /node_modules /dist /docs/.vitepress/cache/* -/docs/.vitepress/dist \ No newline at end of file +/docs/.vitepress/dist +.env \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index eb79d959..de2f334b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -10,7 +10,11 @@ "license": "ISC", "devDependencies": { "@types/node": "^22.13.10", + "algoliasearch": "^5.21.0", + "dotenv": "^16.4.7", "fast-glob": "^3.3.3", + "gray-matter": "^4.0.3", + "marked": "^15.0.7", "vitepress": "^1.6.3", "vue": "^3.5.13" } @@ -1537,6 +1541,16 @@ "node": ">= 14.0.0" } }, + "node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "dev": true, + "license": "MIT", + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, "node_modules/birpc": { "version": "0.2.19", "resolved": "https://registry.npmjs.org/birpc/-/birpc-0.2.19.tgz", @@ -1651,6 +1665,19 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/dotenv": { + "version": "16.4.7", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.7.tgz", + "integrity": "sha512-47qPchRCykZC03FhkYAhrvwU4xDBFIj1QPqaarj6mdM/hgUzfPHcpkHJOn3mJAufFeeAxAzeGsr5X0M4k6fLZQ==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/emoji-regex-xs": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/emoji-regex-xs/-/emoji-regex-xs-1.0.0.tgz", @@ -1710,6 +1737,20 @@ "@esbuild/win32-x64": "0.21.5" } }, + "node_modules/esprima": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz", + "integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==", + "dev": true, + "license": "BSD-2-Clause", + "bin": { + "esparse": "bin/esparse.js", + "esvalidate": "bin/esvalidate.js" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/estree-walker": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", @@ -1717,6 +1758,19 @@ "dev": true, "license": "MIT" }, + "node_modules/extend-shallow": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extend-shallow/-/extend-shallow-2.0.1.tgz", + "integrity": "sha512-zCnTtlxNoAiDc3gqY2aYAWFx7XWWiasuF2K8Me5WbN8otHKTUKBwjPtNpRs/rbUZm7KxWAaNj7P1a/p52GbVug==", + "dev": true, + "license": "MIT", + "dependencies": { + "is-extendable": "^0.1.0" + }, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/fast-glob": { "version": "3.3.3", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.3.tgz", @@ -1795,6 +1849,22 @@ "node": ">= 6" } }, + "node_modules/gray-matter": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/gray-matter/-/gray-matter-4.0.3.tgz", + "integrity": "sha512-5v6yZd4JK3eMI3FqqCouswVqwugaA9r4dNZB1wwcmrD02QkV5H0y7XBQW8QwQqEaZY1pM9aqORSORhJRdNK44Q==", + "dev": true, + "license": "MIT", + "dependencies": { + "js-yaml": "^3.13.1", + "kind-of": "^6.0.2", + "section-matter": "^1.0.0", + "strip-bom-string": "^1.0.0" + }, + "engines": { + "node": ">=6.0" + } + }, "node_modules/hast-util-to-html": { "version": "9.0.5", "resolved": "https://registry.npmjs.org/hast-util-to-html/-/hast-util-to-html-9.0.5.tgz", @@ -1851,6 +1921,16 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/is-extendable": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/is-extendable/-/is-extendable-0.1.1.tgz", + "integrity": "sha512-5BMULNob1vgFX6EjQw5izWDxrecWK9AM72rugNr0TFldMOi0fj6Jk+zeKIt0xGj4cEfQIJth4w3OKWOJ4f+AFw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/is-extglob": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/is-extglob/-/is-extglob-2.1.1.tgz", @@ -1897,6 +1977,30 @@ "url": "https://github.com/sponsors/mesqueeb" } }, + "node_modules/js-yaml": { + "version": "3.14.1", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.14.1.tgz", + "integrity": "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==", + "dev": true, + "license": "MIT", + "dependencies": { + "argparse": "^1.0.7", + "esprima": "^4.0.0" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, + "node_modules/kind-of": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz", + "integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/magic-string": { "version": "0.30.17", "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.17.tgz", @@ -1914,6 +2018,19 @@ "dev": true, "license": "MIT" }, + "node_modules/marked": { + "version": "15.0.7", + "resolved": "https://registry.npmjs.org/marked/-/marked-15.0.7.tgz", + "integrity": "sha512-dgLIeKGLx5FwziAnsk4ONoGwHwGPJzselimvlVskE9XLN4Orv9u2VA3GWw/lYUqjfA0rUT/6fqKwfZJapP9BEg==", + "dev": true, + "license": "MIT", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/mdast-util-to-hast": { "version": "13.2.0", "resolved": "https://registry.npmjs.org/mdast-util-to-hast/-/mdast-util-to-hast-13.2.0.tgz", @@ -2314,6 +2431,20 @@ "license": "MIT", "peer": true }, + "node_modules/section-matter": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/section-matter/-/section-matter-1.0.0.tgz", + "integrity": "sha512-vfD3pmTzGpufjScBh50YHKzEu2lxBWhVEHsNGoEXmCmn2hKGfeNLYMzCJpe8cD7gqX7TJluOVpBkAequ6dgMmA==", + "dev": true, + "license": "MIT", + "dependencies": { + "extend-shallow": "^2.0.1", + "kind-of": "^6.0.0" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/shiki": { "version": "2.5.0", "resolved": "https://registry.npmjs.org/shiki/-/shiki-2.5.0.tgz", @@ -2362,6 +2493,13 @@ "node": ">=0.10.0" } }, + "node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", + "dev": true, + "license": "BSD-3-Clause" + }, "node_modules/stringify-entities": { "version": "4.0.4", "resolved": "https://registry.npmjs.org/stringify-entities/-/stringify-entities-4.0.4.tgz", @@ -2377,6 +2515,16 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/strip-bom-string": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/strip-bom-string/-/strip-bom-string-1.0.0.tgz", + "integrity": "sha512-uCC2VHvQRYu+lMh4My/sFNmF2klFymLX1wHJeXnbEJERpV/ZsVuonzerjfrGpIGF7LBVa1O7i9kjiWvJiFck8g==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/superjson": { "version": "2.2.2", "resolved": "https://registry.npmjs.org/superjson/-/superjson-2.2.2.tgz", diff --git a/package.json b/package.json index 81252db2..963b7874 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,7 @@ "scripts": { "docs:dev": "vitepress dev docs", "docs:build": "vitepress build docs", + "docs:algolia": "node scripts/algolia-upload.mjs", "docs:preview": "vitepress preview docs" }, "author": "boybook", @@ -12,8 +13,12 @@ "description": "", "devDependencies": { "@types/node": "^22.13.10", + "algoliasearch": "^5.21.0", + "dotenv": "^16.4.7", "fast-glob": "^3.3.3", + "gray-matter": "^4.0.3", "vitepress": "^1.6.3", - "vue": "^3.5.13" + "vue": "^3.5.13", + "marked": "^15.0.7" } } diff --git a/scripts/algolia-upload.mjs b/scripts/algolia-upload.mjs new file mode 100644 index 00000000..8cc25afe --- /dev/null +++ b/scripts/algolia-upload.mjs @@ -0,0 +1,446 @@ +import { readFile, readdir } from 'node:fs/promises'; +import { resolve, join } from 'node:path'; +import matter from 'gray-matter'; +import { algoliasearch } from 'algoliasearch'; +import { fileURLToPath } from 'url'; +import path from 'path'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = path.dirname(__filename); +const docsDir = resolve(__dirname, '../docs'); + +// 加载环境变量 +await import('dotenv').then(r => r.config()); + +// 配置参数 +const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID; +const ALGOLIA_API_KEY = process.env.ALGOLIA_ADMIN_KEY; +const INDEX_NAME = 'netease-modsdk'; +const MAX_RECORD_SIZE = 8000; // 字节,保守一点小于10000的限制 + +/** + * 递归获取所有 Markdown 文件 + */ +async function getAllMarkdownFiles(dir, base = '') { + const files = []; + const entries = await readdir(dir, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = join(dir, entry.name); + + if (entry.isDirectory()) { + const subPath = join(base, entry.name); + const subFiles = await getAllMarkdownFiles(fullPath, subPath); + files.push(...subFiles); + } else if (entry.name.endsWith('.md')) { + // 忽略以 _ 开头的特殊文件 + if (entry.name.startsWith('_') || entry.name.includes('索引')) { + console.log(entry.name); + continue; + } + // 构建路由路径 + let routePath = join(base, entry.name.replace(/\.md$/, '')); + // 特殊处理 index.md 文件,转换为目录路径 + if (entry.name === 'index.md') { + routePath = base; + } + // 确保路径以 / 开头 + routePath = '/' + routePath; + // 规范化路径分隔符 + routePath = routePath.replace(/\\/g, '/'); + + files.push({ + filePath: fullPath, + path: routePath + }); + } + } + + return files; +} + +import { marked } from 'marked'; + +/** + * 使用marked解析器按照标题(H1和H2)分割Markdown内容 + * 跳过标题为"索引"的H1部分 + */ +function splitByHeadings(content) { + const tokens = marked.lexer(content); + const sections = []; + let currentH1 = null; + let currentH2 = null; + let currentSection = { + h1Title: null, + h2Title: null, + content: '', + anchor: '', + level: 0 + }; + let sectionStartIndex = 0; + let skipSection = false; // 标记是否跳过当前区块 + + // 初始处理:如果内容不是以标题开始,创建初始区块 + if (tokens.length > 0 && tokens[0].type !== 'heading') { + let initialContent = ''; + let i = 0; + + // 收集直到第一个标题之前的所有内容 + while (i < tokens.length && tokens[i].type !== 'heading') { + if (tokens[i].type === 'paragraph') { + initialContent += tokens[i].text + '\n\n'; + } else if (tokens[i].type === 'code') { + initialContent += '```' + tokens[i].lang + '\n' + tokens[i].text + '\n```\n\n'; + } else { + initialContent += tokens[i].raw + '\n'; + } + i++; + } + + if (initialContent.trim()) { + sections.push({ + h1Title: null, + h2Title: null, + content: initialContent.trim(), + anchor: '', + level: 0 + }); + } + } + + // 主循环:处理所有标题并分割内容 + for (let i = 0; i < tokens.length; i++) { + const token = tokens[i]; + + if (token.type === 'heading') { + // 遇到新标题,处理之前的内容 + if (i > sectionStartIndex && !skipSection) { + // 如果当前内容不为空且不在跳过模式,保存当前区块 + if (currentSection.content.trim()) { + sections.push({ ...currentSection }); + } + } + + // 检查是否需要跳过此部分(标题为"索引") + skipSection = token.text === "索引" || token.text.includes("索引"); + + // 根据标题级别更新当前上下文 + if (!skipSection && token.depth === 1) { + currentH1 = token.text; + currentH2 = null; + + if (!skipSection) { + currentSection = { + h1Title: token.text, + h2Title: null, + content: token.raw + '\n', + anchor: generateAnchor(token.text), + level: 1 + }; + } else { + console.log(`跳过索引部分: ${token.text}`); + currentSection = { + h1Title: null, + h2Title: null, + content: '', + anchor: '', + level: 0 + }; + } + } else if (!skipSection && token.depth === 2) { + currentH2 = token.text; + currentSection = { + h1Title: currentH1, + h2Title: token.text, + content: token.raw + '\n', + anchor: generateAnchor(token.text), + level: 2 + }; + } + + sectionStartIndex = i; + } else if (!skipSection) { // 只处理非跳过模式下的内容 + // 非标题内容,添加到当前区块 + if (token.type === 'paragraph') { + currentSection.content += token.text + '\n\n'; + } else if (token.type === 'code') { + currentSection.content += '```' + token.lang + '\n' + token.text + '\n```\n\n'; + } else { + currentSection.content += token.raw + '\n'; + } + } + } + + // 添加最后一个区块(如果不在跳过模式) + if (currentSection.content.trim() && !skipSection) { + sections.push(currentSection); + } + + return sections; +} + +/** + * 生成VitePress兼容的锚点链接 + */ +function generateAnchor(text) { + return `#${text + .toLowerCase() + .replace(/\s+/g, '-') + .replace(/[^\w\u4e00-\u9fa5-]/g, '')}`; +} + +/** + * 清理内容,移除代码块等 + */ +function cleanContent(content) { + // 移除代码块 + return content.replace(/```[\s\S]*?```/g, ''); +} + +/** + * 获取目录的优先级 + * mcdocs - 最高优先级 + * mcguide - 中等优先级 + * mconline - 最低优先级 + */ +function getDirectoryPriority(path) { + const firstDir = path.split('/').filter(Boolean)[0]; + + switch (firstDir) { + case 'mcdocs': + return 3; // 最高优先级 + case 'mcguide': + return 2; // 中等优先级 + case 'mconline': + return 1; // 较低优先级 + default: + return 0; // 默认优先级 + } +} + +/** + * 生成符合VitePress的Algolia索引记录 + */ +async function generateAlgoliaRecords() { + // 获取所有 Markdown 文件 + const pages = await getAllMarkdownFiles(docsDir); + const records = []; + + for (const page of pages) { + try { + const rawContent = await readFile(page.filePath, 'utf8'); + const { data: frontmatter, content } = matter(rawContent); + const pageTitle = frontmatter.title || page.path.split('/').pop() || page.path; + + // 确定这个页面的优先级 + const priority = getDirectoryPriority(page.path); + + // 按标题分割内容 + const sections = splitByHeadings(content); + + for (const section of sections) { + const { h1Title, h2Title, content: sectionContent, anchor, level } = section; + const cleanedContent = cleanContent(sectionContent); + + // 构建这个部分的层次结构 + const hierarchy = { + lvl0: pageTitle, + lvl1: h1Title || pageTitle, + }; + + // 如果有H2标题,添加到层次结构 + if (h2Title) { + hierarchy.lvl2 = h2Title; + } + + // 构建唯一ID和URL + const objectID = `${page.path}${anchor}`; + const url = `https://modsdk.easecation.net${page.path}${anchor}`; + + // 创建记录 + const record = { + objectID, + url, + type: level ? `lvl${level}` : 'content', // 标识这是什么级别的标题 + hierarchy, + content: cleanedContent, + _tags: ['zh-CN'], + lang: "zh-CN", + priority: priority // 添加优先级字段 + }; + + // 如果记录太大,则需要分割 + const recordSize = Buffer.byteLength(JSON.stringify(record), 'utf8'); + if (recordSize <= MAX_RECORD_SIZE) { + records.push(record); + } else { + console.log(`部分内容过大,需要进一步分割: ${objectID}`); + + // 按段落分割大内容 + const paragraphs = cleanedContent.split(/\n\s*\n/); + let currentChunk = ''; + let chunkIndex = 0; + + for (const paragraph of paragraphs) { + if (Buffer.byteLength(currentChunk + paragraph, 'utf8') > MAX_RECORD_SIZE / 2) { + // 当前块足够大,保存并开始新块 + if (currentChunk) { + records.push({ + ...record, + objectID: `${objectID}-chunk-${chunkIndex}`, + content: currentChunk, + _tags: [...(record._tags || []), 'chunked'], + priority: record.priority // 确保保留优先级 + }); + chunkIndex++; + currentChunk = paragraph; + } else { + // 单个段落过大,需要硬分割 + currentChunk = paragraph.substring(0, 1000) + '...'; + records.push({ + ...record, + objectID: `${objectID}-chunk-${chunkIndex}`, + content: currentChunk, + _tags: [...(record._tags || []), 'chunked'], + priority: record.priority // 确保保留优先级 + }); + chunkIndex++; + currentChunk = ''; + } + } else { + currentChunk += (currentChunk ? '\n\n' : '') + paragraph; + } + } + + // 添加最后一个块 + if (currentChunk) { + records.push({ + ...record, + objectID: `${objectID}-chunk-${chunkIndex}`, + content: currentChunk, + _tags: [...(record._tags || []), 'chunked'], + }); + } + } + } + } catch (error) { + console.warn(`无法处理文件 ${page.filePath}: ${error.message}`); + } + } + + return records; +} + +/** + * 上传数据到 Algolia + */ +async function uploadToAlgolia(records) { + if (!ALGOLIA_APP_ID || !ALGOLIA_API_KEY) { + console.error('❌ 缺少 Algolia 凭据。请检查环境变量 ALGOLIA_APP_ID 和 ALGOLIA_ADMIN_KEY'); + return; + } + + const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY); + + try { + console.log(`开始上传 ${records.length} 条记录到 Algolia...`); + + // 分批上传记录以避免请求过大 + const batchSize = 50; // 每批次处理的记录数 + let successCount = 0; + let failureCount = 0; + + for (let i = 0; i < records.length; i += batchSize) { + const batch = records.slice(i, i + batchSize); + try { + const operations = batch.map(record => ({ + action: 'updateObject', + indexName: INDEX_NAME, + body: record + })); + + await client.multipleBatch({ requests: operations }); + successCount += batch.length; + console.log(`✅ 批次 ${Math.floor(i / batchSize) + 1}/${Math.ceil(records.length / batchSize)} 已上传 (${successCount}/${records.length})`); + } catch (batchError) { + console.error(`❌ 批次 ${Math.floor(i / batchSize) + 1} 上传失败:`, batchError.message); + failureCount += batch.length; + + // 尝试逐条上传这个批次,以识别有问题的记录 + for (const record of batch) { + try { + await client.addOrUpdateObject({ + indexName: INDEX_NAME, + objectID: record.objectID, + body: record + }); + successCount += 1; + failureCount -= 1; + } catch (recordError) { + console.error(`❌ 记录上传失败 (objectID: ${record.objectID}):`, recordError.message); + console.error(`记录大小: ${Buffer.byteLength(JSON.stringify(record), 'utf8')} 字节`); + } + } + } + } + + console.log(`✅ 完成上传 - 成功: ${successCount}, 失败: ${failureCount}`); + } catch (error) { + console.error('❌ 上传失败:', error); + if (error.message) console.error('错误信息:', error.message); + if (error.status) console.error('状态码:', error.status); + } +} + +async function settingAlgolia() { + if (!ALGOLIA_APP_ID || !ALGOLIA_API_KEY) { + console.error('❌ 缺少 Algolia 凭据。请检查环境变量 ALGOLIA_APP_ID 和 ALGOLIA_ADMIN_KEY'); + return; + } + + const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY); + + try { + const response = await client.setSettings({ + indexName: INDEX_NAME, + indexSettings: { + attributesForFaceting: ['lang', 'type'], + attributesToHighlight: ['hierarchy.lvl0', 'hierarchy.lvl1', 'hierarchy.lvl2', 'content'], + attributesToSnippet: ['content:20'], + attributesToRetrieve: ['hierarchy', 'content', 'type', 'url', 'lang', 'priority'], + searchableAttributes: [ + 'hierarchy.lvl0', + 'hierarchy.lvl1', + 'hierarchy.lvl2', + 'content' + ], + customRanking: [ + 'desc(priority)', // 首先按优先级降序排列(值越高排越前) + 'asc(content.length)' // 其次按内容长度升序(简短内容优先) + ] + }, + forwardToReplicas: true, + }); + console.log('✅ 索引设置已更新'); + } catch (error) { + console.error('❌ 设置失败:', error); + } +} + +// 执行流程 +try { + console.log('设置全局配置...'); + await settingAlgolia(); + + console.log('开始生成 Algolia 索引数据...'); + const records = await generateAlgoliaRecords(); + console.log(`生成了 ${records.length} 条记录`); + + if (records.length > 0) { + await uploadToAlgolia(records); + } else { + console.warn('⚠️ 没有找到文档,跳过上传'); + } +} catch (error) { + console.error('❌ 处理失败:', error); +} \ No newline at end of file