#!/usr/bin/env node import fs from 'fs'; import path from 'path'; const CLEAN_CONTENT_REGEX = { comments: /\/\*[\s\S]*?\*\/|\/\/.*$/gm, templateLiterals: /`[\s\S]*?`/g, strings: /'[^']*'|"[^"]*"/g, jsxExpressions: /\{.*?\}/g, htmlEntities: { quot: /"/g, amp: /&/g, lt: /</g, gt: />/g, apos: /'/g } }; const EXTRACTION_REGEX = { route: /]*>/g, path: /path=["']([^"']+)["']/, element: /element=\{<(\w+)[^}]*\/?\s*>\}/, helmet: /]*?>([\s\S]*?)<\/Helmet>/i, helmetTest: //i, title: /]*?>\s*(.*?)\s*<\/title>/i, description: /') .replace(CLEAN_CONTENT_REGEX.htmlEntities.apos, "'") .trim(); } function extractRoutes(appJsxPath) { if (!fs.existsSync(appJsxPath)) return new Map(); try { const content = fs.readFileSync(appJsxPath, 'utf8'); const routes = new Map(); const routeMatches = [...content.matchAll(EXTRACTION_REGEX.route)]; for (const match of routeMatches) { const routeTag = match[0]; const pathMatch = routeTag.match(EXTRACTION_REGEX.path); const elementMatch = routeTag.match(EXTRACTION_REGEX.element); const isIndex = routeTag.includes('index'); if (elementMatch) { const componentName = elementMatch[1]; let routePath; if (isIndex) { routePath = '/'; } else if (pathMatch) { routePath = pathMatch[1].startsWith('/') ? pathMatch[1] : `/${pathMatch[1]}`; } routes.set(componentName, routePath); } } return routes; } catch (error) { return new Map(); } } function findReactFiles(dir) { return fs.readdirSync(dir).map(item => path.join(dir, item)); } function extractHelmetData(content, filePath, routes) { const cleanedContent = cleanContent(content); if (!EXTRACTION_REGEX.helmetTest.test(cleanedContent)) { return null; } const helmetMatch = content.match(EXTRACTION_REGEX.helmet); if (!helmetMatch) return null; const helmetContent = helmetMatch[1]; const titleMatch = helmetContent.match(EXTRACTION_REGEX.title); const descMatch = helmetContent.match(EXTRACTION_REGEX.description); const title = cleanText(titleMatch?.[1]); const description = cleanText(descMatch?.[1]); const fileName = path.basename(filePath, path.extname(filePath)); const url = routes.length && routes.has(fileName) ? routes.get(fileName) : generateFallbackUrl(fileName); return { url, title: title || 'Untitled Page', description: description || 'No description available' }; } function generateFallbackUrl(fileName) { const cleanName = fileName.replace(/Page$/, '').toLowerCase(); return cleanName === 'app' ? '/' : `/${cleanName}`; } function generateLlmsTxt(pages) { const sortedPages = pages.sort((a, b) => a.title.localeCompare(b.title)); const pageEntries = sortedPages.map(page => `- [${page.title}](${page.url}): ${page.description}` ).join('\n'); return `## Pages\n${pageEntries}`; } function ensureDirectoryExists(dirPath) { if (!fs.existsSync(dirPath)) { fs.mkdirSync(dirPath, { recursive: true }); } } function processPageFile(filePath, routes) { try { const content = fs.readFileSync(filePath, 'utf8'); return extractHelmetData(content, filePath, routes); } catch (error) { console.error(`❌ Error processing ${filePath}:`, error.message); return null; } } function main() { const pagesDir = path.join(process.cwd(), 'src', 'pages'); const appJsxPath = path.join(process.cwd(), 'src', 'App.jsx'); let pages = []; if (!fs.existsSync(pagesDir)) { pages.push(processPageFile(appJsxPath, [])); } else { const routes = extractRoutes(appJsxPath); const reactFiles = findReactFiles(pagesDir); pages = reactFiles .map(filePath => processPageFile(filePath, routes)) .filter(Boolean); if (pages.length === 0) { console.error('❌ No pages with Helmet components found!'); process.exit(1); } } const llmsTxtContent = generateLlmsTxt(pages); const outputPath = path.join(process.cwd(), 'public', 'llms.txt'); ensureDirectoryExists(path.dirname(outputPath)); fs.writeFileSync(outputPath, llmsTxtContent, 'utf8'); } const isMainModule = import.meta.url === `file://${process.argv[1]}`; if (isMainModule) { main(); }