import { readFile, readdir } from 'node:fs/promises'; import { resolve, join } from 'node:path'; import matter from 'gray-matter'; import { algoliasearch } from 'algoliasearch'; import { fileURLToPath } from 'url'; import path from 'path'; const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); const docsDir = resolve(__dirname, '../docs'); // 加载环境变量 await import('dotenv').then(r => r.config()); // 配置参数 const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID; const ALGOLIA_API_KEY = process.env.ALGOLIA_ADMIN_KEY; const INDEX_NAME = 'netease-modsdk'; const MAX_RECORD_SIZE = 8000; // 字节,保守一点小于10000的限制 /** * 递归获取所有 Markdown 文件 */ async function getAllMarkdownFiles(dir, base = '') { const files = []; const entries = await readdir(dir, { withFileTypes: true }); for (const entry of entries) { const fullPath = join(dir, entry.name); if (entry.isDirectory()) { const subPath = join(base, entry.name); const subFiles = await getAllMarkdownFiles(fullPath, subPath); files.push(...subFiles); } else if (entry.name.endsWith('.md')) { // 忽略以 _ 开头的特殊文件 if (entry.name.startsWith('_') || entry.name.includes('索引')) { console.log(entry.name); continue; } // 构建路由路径 let routePath = join(base, entry.name.replace(/\.md$/, '')); // 特殊处理 index.md 文件,转换为目录路径 if (entry.name === 'index.md') { routePath = base; } // 确保路径以 / 开头 routePath = '/' + routePath; // 规范化路径分隔符 routePath = routePath.replace(/\\/g, '/'); files.push({ filePath: fullPath, path: routePath }); } } return files; } import { marked } from 'marked'; /** * 使用marked解析器按照标题(H1和H2)分割Markdown内容 * 跳过标题为"索引"的H1部分 */ function splitByHeadings(content) { const tokens = marked.lexer(content); const sections = []; let currentH1 = null; let currentH2 = null; let currentSection = { h1Title: null, h2Title: null, content: '', anchor: '', level: 0 }; let sectionStartIndex = 0; let skipSection = false; // 标记是否跳过当前区块 // 初始处理:如果内容不是以标题开始,创建初始区块 if (tokens.length > 0 && tokens[0].type !== 'heading') { let initialContent = ''; let i = 0; // 收集直到第一个标题之前的所有内容 while (i < tokens.length && tokens[i].type !== 'heading') { if (tokens[i].type === 'paragraph') { initialContent += tokens[i].text + '\n\n'; } else if (tokens[i].type === 'code') { initialContent += '```' + tokens[i].lang + '\n' + tokens[i].text + '\n```\n\n'; } else { initialContent += tokens[i].raw + '\n'; } i++; } if (initialContent.trim()) { sections.push({ h1Title: null, h2Title: null, content: initialContent.trim(), anchor: '', level: 0 }); } } // 主循环:处理所有标题并分割内容 for (let i = 0; i < tokens.length; i++) { const token = tokens[i]; if (token.type === 'heading') { // 遇到新标题,处理之前的内容 if (i > sectionStartIndex && !skipSection) { // 如果当前内容不为空且不在跳过模式,保存当前区块 if (currentSection.content.trim()) { sections.push({ ...currentSection }); } } // 检查是否需要跳过此部分(标题为"索引") skipSection = token.text === "索引" || token.text.includes("索引"); // 根据标题级别更新当前上下文 if (!skipSection && token.depth === 1) { currentH1 = token.text; currentH2 = null; if (!skipSection) { currentSection = { h1Title: token.text, h2Title: null, content: token.raw + '\n', anchor: generateAnchor(token.text), level: 1 }; } else { console.log(`跳过索引部分: ${token.text}`); currentSection = { h1Title: null, h2Title: null, content: '', anchor: '', level: 0 }; } } else if (!skipSection && token.depth === 2) { currentH2 = token.text; currentSection = { h1Title: currentH1, h2Title: token.text, content: token.raw + '\n', anchor: generateAnchor(token.text), level: 2 }; } sectionStartIndex = i; } else if (!skipSection) { // 只处理非跳过模式下的内容 // 非标题内容,添加到当前区块 if (token.type === 'paragraph') { currentSection.content += token.text + '\n\n'; } else if (token.type === 'code') { currentSection.content += '```' + token.lang + '\n' + token.text + '\n```\n\n'; } else { currentSection.content += token.raw + '\n'; } } } // 添加最后一个区块(如果不在跳过模式) if (currentSection.content.trim() && !skipSection) { sections.push(currentSection); } return sections; } /** * 生成VitePress兼容的锚点链接 */ function generateAnchor(text) { return `#${text .toLowerCase() .replace(/\s+/g, '-') .replace(/[^\w\u4e00-\u9fa5-]/g, '')}`; } /** * 清理内容,移除代码块等 */ function cleanContent(content) { // 移除代码块 return content.replace(/```[\s\S]*?```/g, ''); } /** * 获取目录的优先级 * mcdocs - 最高优先级 * mcguide - 中等优先级 * mconline - 最低优先级 */ function getDirectoryPriority(path) { const firstDir = path.split('/').filter(Boolean)[0]; switch (firstDir) { case 'mcdocs': return 3; // 最高优先级 case 'mcguide': return 2; // 中等优先级 case 'mconline': return 1; // 较低优先级 default: return 0; // 默认优先级 } } /** * 生成符合VitePress的Algolia索引记录 */ async function generateAlgoliaRecords() { // 获取所有 Markdown 文件 const pages = await getAllMarkdownFiles(docsDir); const records = []; for (const page of pages) { try { const rawContent = await readFile(page.filePath, 'utf8'); const { data: frontmatter, content } = matter(rawContent); const pageTitle = frontmatter.title || page.path.split('/').pop() || page.path; // 确定这个页面的优先级 const priority = getDirectoryPriority(page.path); // 按标题分割内容 const sections = splitByHeadings(content); for (const section of sections) { const { h1Title, h2Title, content: sectionContent, anchor, level } = section; const cleanedContent = cleanContent(sectionContent); // 构建这个部分的层次结构 const hierarchy = { lvl0: pageTitle, lvl1: h1Title || pageTitle, }; // 如果有H2标题,添加到层次结构 if (h2Title) { hierarchy.lvl2 = h2Title; } // 构建唯一ID和URL const objectID = `${page.path}${anchor}`; const url = `https://modsdk.easecation.net${page.path}${anchor}`; // 创建记录 const record = { objectID, url, type: level ? `lvl${level}` : 'content', // 标识这是什么级别的标题 hierarchy, content: cleanedContent, _tags: ['zh-CN'], lang: "zh-CN", priority: priority // 添加优先级字段 }; // 如果记录太大,则需要分割 const recordSize = Buffer.byteLength(JSON.stringify(record), 'utf8'); if (recordSize <= MAX_RECORD_SIZE) { records.push(record); } else { console.log(`部分内容过大,需要进一步分割: ${objectID}`); // 按段落分割大内容 const paragraphs = cleanedContent.split(/\n\s*\n/); let currentChunk = ''; let chunkIndex = 0; for (const paragraph of paragraphs) { if (Buffer.byteLength(currentChunk + paragraph, 'utf8') > MAX_RECORD_SIZE / 2) { // 当前块足够大,保存并开始新块 if (currentChunk) { records.push({ ...record, objectID: `${objectID}-chunk-${chunkIndex}`, content: currentChunk, _tags: [...(record._tags || []), 'chunked'], priority: record.priority // 确保保留优先级 }); chunkIndex++; currentChunk = paragraph; } else { // 单个段落过大,需要硬分割 currentChunk = paragraph.substring(0, 1000) + '...'; records.push({ ...record, objectID: `${objectID}-chunk-${chunkIndex}`, content: currentChunk, _tags: [...(record._tags || []), 'chunked'], priority: record.priority // 确保保留优先级 }); chunkIndex++; currentChunk = ''; } } else { currentChunk += (currentChunk ? '\n\n' : '') + paragraph; } } // 添加最后一个块 if (currentChunk) { records.push({ ...record, objectID: `${objectID}-chunk-${chunkIndex}`, content: currentChunk, _tags: [...(record._tags || []), 'chunked'], }); } } } } catch (error) { console.warn(`无法处理文件 ${page.filePath}: ${error.message}`); } } return records; } /** * 上传数据到 Algolia */ async function uploadToAlgolia(records) { if (!ALGOLIA_APP_ID || !ALGOLIA_API_KEY) { console.error('❌ 缺少 Algolia 凭据。请检查环境变量 ALGOLIA_APP_ID 和 ALGOLIA_ADMIN_KEY'); return; } const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY); try { console.log(`开始上传 ${records.length} 条记录到 Algolia...`); // 分批上传记录以避免请求过大 const batchSize = 50; // 每批次处理的记录数 let successCount = 0; let failureCount = 0; for (let i = 0; i < records.length; i += batchSize) { const batch = records.slice(i, i + batchSize); try { const operations = batch.map(record => ({ action: 'updateObject', indexName: INDEX_NAME, body: record })); await client.multipleBatch({ requests: operations }); successCount += batch.length; console.log(`✅ 批次 ${Math.floor(i / batchSize) + 1}/${Math.ceil(records.length / batchSize)} 已上传 (${successCount}/${records.length})`); } catch (batchError) { console.error(`❌ 批次 ${Math.floor(i / batchSize) + 1} 上传失败:`, batchError.message); failureCount += batch.length; // 尝试逐条上传这个批次,以识别有问题的记录 for (const record of batch) { try { await client.addOrUpdateObject({ indexName: INDEX_NAME, objectID: record.objectID, body: record }); successCount += 1; failureCount -= 1; } catch (recordError) { console.error(`❌ 记录上传失败 (objectID: ${record.objectID}):`, recordError.message); console.error(`记录大小: ${Buffer.byteLength(JSON.stringify(record), 'utf8')} 字节`); } } } } console.log(`✅ 完成上传 - 成功: ${successCount}, 失败: ${failureCount}`); } catch (error) { console.error('❌ 上传失败:', error); if (error.message) console.error('错误信息:', error.message); if (error.status) console.error('状态码:', error.status); } } async function settingAlgolia() { if (!ALGOLIA_APP_ID || !ALGOLIA_API_KEY) { console.error('❌ 缺少 Algolia 凭据。请检查环境变量 ALGOLIA_APP_ID 和 ALGOLIA_ADMIN_KEY'); return; } const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY); try { const response = await client.setSettings({ indexName: INDEX_NAME, indexSettings: { attributesForFaceting: ['lang', 'type'], attributesToHighlight: ['hierarchy.lvl0', 'hierarchy.lvl1', 'hierarchy.lvl2', 'content'], attributesToSnippet: ['content:20'], attributesToRetrieve: ['hierarchy', 'content', 'type', 'url', 'lang', 'priority'], searchableAttributes: [ 'hierarchy.lvl0', 'hierarchy.lvl1', 'hierarchy.lvl2', 'content' ], customRanking: [ 'desc(priority)', // 首先按优先级降序排列(值越高排越前) 'asc(content.length)' // 其次按内容长度升序(简短内容优先) ] }, forwardToReplicas: true, }); console.log('✅ 索引设置已更新'); } catch (error) { console.error('❌ 设置失败:', error); } } // 执行流程 try { console.log('设置全局配置...'); await settingAlgolia(); console.log('开始生成 Algolia 索引数据...'); const records = await generateAlgoliaRecords(); console.log(`生成了 ${records.length} 条记录`); if (records.length > 0) { await uploadToAlgolia(records); } else { console.warn('⚠️ 没有找到文档,跳过上传'); } } catch (error) { console.error('❌ 处理失败:', error); }