Sage100/tools/generate-llms.js
2026-01-20 11:04:04 +03:00

181 lines
4.9 KiB
JavaScript

#!/usr/bin/env node
import fs from 'fs';
import path from 'path';
const CLEAN_CONTENT_REGEX = {
comments: /\/\*[\s\S]*?\*\/|\/\/.*$/gm,
templateLiterals: /`[\s\S]*?`/g,
strings: /'[^']*'|"[^"]*"/g,
jsxExpressions: /\{.*?\}/g,
htmlEntities: {
quot: /"/g,
amp: /&/g,
lt: /</g,
gt: />/g,
apos: /'/g
}
};
const EXTRACTION_REGEX = {
route: /<Route\s+[^>]*>/g,
path: /path=["']([^"']+)["']/,
element: /element=\{<(\w+)[^}]*\/?\s*>\}/,
helmet: /<Helmet[^>]*?>([\s\S]*?)<\/Helmet>/i,
helmetTest: /<Helmet[\s\S]*?<\/Helmet>/i,
title: /<title[^>]*?>\s*(.*?)\s*<\/title>/i,
description: /<meta\s+name=["']description["']\s+content=["'](.*?)["']/i
};
function cleanContent(content) {
return content
.replace(CLEAN_CONTENT_REGEX.comments, '')
.replace(CLEAN_CONTENT_REGEX.templateLiterals, '""')
.replace(CLEAN_CONTENT_REGEX.strings, '""');
}
function cleanText(text) {
if (!text) return text;
return text
.replace(CLEAN_CONTENT_REGEX.jsxExpressions, '')
.replace(CLEAN_CONTENT_REGEX.htmlEntities.quot, '"')
.replace(CLEAN_CONTENT_REGEX.htmlEntities.amp, '&')
.replace(CLEAN_CONTENT_REGEX.htmlEntities.lt, '<')
.replace(CLEAN_CONTENT_REGEX.htmlEntities.gt, '>')
.replace(CLEAN_CONTENT_REGEX.htmlEntities.apos, "'")
.trim();
}
function extractRoutes(appJsxPath) {
if (!fs.existsSync(appJsxPath)) return new Map();
try {
const content = fs.readFileSync(appJsxPath, 'utf8');
const routes = new Map();
const routeMatches = [...content.matchAll(EXTRACTION_REGEX.route)];
for (const match of routeMatches) {
const routeTag = match[0];
const pathMatch = routeTag.match(EXTRACTION_REGEX.path);
const elementMatch = routeTag.match(EXTRACTION_REGEX.element);
const isIndex = routeTag.includes('index');
if (elementMatch) {
const componentName = elementMatch[1];
let routePath;
if (isIndex) {
routePath = '/';
} else if (pathMatch) {
routePath = pathMatch[1].startsWith('/') ? pathMatch[1] : `/${pathMatch[1]}`;
}
routes.set(componentName, routePath);
}
}
return routes;
} catch (error) {
return new Map();
}
}
function findReactFiles(dir) {
return fs.readdirSync(dir).map(item => path.join(dir, item));
}
function extractHelmetData(content, filePath, routes) {
const cleanedContent = cleanContent(content);
if (!EXTRACTION_REGEX.helmetTest.test(cleanedContent)) {
return null;
}
const helmetMatch = content.match(EXTRACTION_REGEX.helmet);
if (!helmetMatch) return null;
const helmetContent = helmetMatch[1];
const titleMatch = helmetContent.match(EXTRACTION_REGEX.title);
const descMatch = helmetContent.match(EXTRACTION_REGEX.description);
const title = cleanText(titleMatch?.[1]);
const description = cleanText(descMatch?.[1]);
const fileName = path.basename(filePath, path.extname(filePath));
const url = routes.length && routes.has(fileName)
? routes.get(fileName)
: generateFallbackUrl(fileName);
return {
url,
title: title || 'Untitled Page',
description: description || 'No description available'
};
}
function generateFallbackUrl(fileName) {
const cleanName = fileName.replace(/Page$/, '').toLowerCase();
return cleanName === 'app' ? '/' : `/${cleanName}`;
}
function generateLlmsTxt(pages) {
const sortedPages = pages.sort((a, b) => a.title.localeCompare(b.title));
const pageEntries = sortedPages.map(page =>
`- [${page.title}](${page.url}): ${page.description}`
).join('\n');
return `## Pages\n${pageEntries}`;
}
function ensureDirectoryExists(dirPath) {
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true });
}
}
function processPageFile(filePath, routes) {
try {
const content = fs.readFileSync(filePath, 'utf8');
return extractHelmetData(content, filePath, routes);
} catch (error) {
console.error(`❌ Error processing ${filePath}:`, error.message);
return null;
}
}
function main() {
const pagesDir = path.join(process.cwd(), 'src', 'pages');
const appJsxPath = path.join(process.cwd(), 'src', 'App.jsx');
let pages = [];
if (!fs.existsSync(pagesDir)) {
pages.push(processPageFile(appJsxPath, []));
} else {
const routes = extractRoutes(appJsxPath);
const reactFiles = findReactFiles(pagesDir);
pages = reactFiles
.map(filePath => processPageFile(filePath, routes))
.filter(Boolean);
if (pages.length === 0) {
console.error('❌ No pages with Helmet components found!');
process.exit(1);
}
}
const llmsTxtContent = generateLlmsTxt(pages);
const outputPath = path.join(process.cwd(), 'public', 'llms.txt');
ensureDirectoryExists(path.dirname(outputPath));
fs.writeFileSync(outputPath, llmsTxtContent, 'utf8');
}
const isMainModule = import.meta.url === `file://${process.argv[1]}`;
if (isMainModule) {
main();
}