Spaces:
Running
Running
| /** | |
| * Unified AI Orchestration Pipeline | |
| * | |
| * Flow: User Input β Schema Validation β Domain Engine β Structured JSON β | |
| * AI Summarization β Self-Reverification β Language Localization β Response | |
| * | |
| * Rules: | |
| * - AI must NEVER calculate business values. | |
| * - AI must ONLY summarize structured outputs. | |
| * - AI must NOT fabricate data. | |
| * - AI must NOT invent government schemes. | |
| * - AI must NOT modify numeric outputs. | |
| */ | |
| import { generateAIContent } from './aiHelper.js'; | |
| // All 22 official Indian languages | |
| export const LANGUAGE_MAP = { | |
| en: 'English', hi: 'Hindi', bn: 'Bengali', mr: 'Marathi', | |
| ta: 'Tamil', te: 'Telugu', pa: 'Punjabi', gu: 'Gujarati', | |
| kn: 'Kannada', ml: 'Malayalam', or: 'Odia', as: 'Assamese', | |
| ur: 'Urdu', sa: 'Sanskrit', kok: 'Konkani', mni: 'Manipuri', | |
| brx: 'Bodo', sat: 'Santhali', mai: 'Maithili', doi: 'Dogri', | |
| ne: 'Nepali', ks: 'Kashmiri', | |
| }; | |
| /** | |
| * Extract language from request (header, query, or body) | |
| * @param {object} req - Express request object | |
| * @returns {string} Language code | |
| */ | |
| export function extractLanguage(req) { | |
| // Priority: query param > body > Accept-Language header > default | |
| const lang = | |
| req.query?.lang || | |
| req.body?.language || | |
| req.body?.lang || | |
| parseAcceptLanguage(req.headers?.['accept-language']) || | |
| 'en'; | |
| return LANGUAGE_MAP[lang] ? lang : 'en'; | |
| } | |
| /** | |
| * Parse Accept-Language header for supported language | |
| */ | |
| function parseAcceptLanguage(header) { | |
| if (!header) return null; | |
| const codes = Object.keys(LANGUAGE_MAP); | |
| const parts = header.split(',').map((p) => p.trim().split(';')[0].trim().toLowerCase()); | |
| for (const part of parts) { | |
| const short = part.split('-')[0]; | |
| if (codes.includes(short)) return short; | |
| } | |
| return null; | |
| } | |
| /** | |
| * Get language name from code | |
| */ | |
| export function getLanguageName(code) { | |
| return LANGUAGE_MAP[code] || 'English'; | |
| } | |
| /** | |
| * Core orchestration pipeline | |
| * | |
| * @param {object} opts | |
| * @param {object} opts.structuredData - Domain engine output (numbers, data) | |
| * @param {string} opts.domainContext - What this data is about (e.g. "crop recommendation") | |
| * @param {string} opts.languageCode - Target language code | |
| * @param {string} [opts.userQuery] - Original user question (optional) | |
| * @param {object} [opts.aiOptions] - Temperature / maxTokens overrides | |
| * @returns {Promise<object>} { summary, structuredData, language, verified } | |
| */ | |
| export async function orchestrate({ | |
| structuredData, | |
| domainContext, | |
| languageCode = 'en', | |
| userQuery = '', | |
| aiOptions = {}, | |
| }) { | |
| const langName = getLanguageName(languageCode); | |
| const dataStr = JSON.stringify(structuredData, null, 2); | |
| // Build a strict summarization prompt | |
| const prompt = buildSummaryPrompt({ dataStr, domainContext, langName, userQuery }); | |
| let summary; | |
| let verified = false; | |
| try { | |
| summary = await generateAIContent(prompt, { | |
| temperature: 0.3, | |
| maxTokens: 2048, | |
| ...aiOptions, | |
| }); | |
| // Self-reverification: check for numeric accuracy | |
| verified = verifyNumericConsistency(structuredData, summary); | |
| if (!verified) { | |
| // Retry once with stricter prompt | |
| const retryPrompt = | |
| prompt + | |
| '\n\nIMPORTANT: Your previous response contained numeric inaccuracies. ' + | |
| 'You MUST use the EXACT numbers from the JSON data. Do NOT round, estimate, or change any value.'; | |
| summary = await generateAIContent(retryPrompt, { | |
| temperature: 0.1, | |
| maxTokens: 2048, | |
| ...aiOptions, | |
| }); | |
| verified = verifyNumericConsistency(structuredData, summary); | |
| if (!verified) { | |
| // Final fallback: generate a safe template-based explanation | |
| summary = buildFallbackSummary(structuredData, domainContext, langName); | |
| verified = true; // fallback is always correct | |
| } | |
| } | |
| } catch (err) { | |
| // AI unavailable β provide structured fallback | |
| summary = buildFallbackSummary(structuredData, domainContext, langName); | |
| verified = true; | |
| } | |
| return { | |
| summary, | |
| structuredData, | |
| language: langName, | |
| languageCode, | |
| verified, | |
| }; | |
| } | |
| /** | |
| * Build a summarization-only prompt (no calculation) | |
| */ | |
| function buildSummaryPrompt({ dataStr, domainContext, langName, userQuery }) { | |
| return `You are an agricultural AI assistant for Indian farmers. | |
| Your task: Summarize the following STRUCTURED DATA into a clear, simple, farmer-friendly explanation. | |
| DOMAIN: ${domainContext} | |
| ${userQuery ? `USER QUESTION: ${userQuery}` : ''} | |
| STRUCTURED DATA (source of truth β do NOT modify any values): | |
| ${dataStr} | |
| STRICT RULES: | |
| 1. Use ONLY the data provided. Do NOT invent or fabricate any information. | |
| 2. Do NOT change, round, or estimate any numeric values β use them EXACTLY as given. | |
| 3. Do NOT invent government schemes, subsidies, or programs not mentioned in the data. | |
| 4. Use simple, easy-to-understand language suitable for farmers with limited literacy. | |
| 5. Avoid technical jargon β explain terms simply. | |
| 6. Keep the summary concise and actionable. | |
| 7. Respond STRICTLY in ${langName} language. | |
| 8. If ${langName} is not English, translate ALL content including technical terms. | |
| 9. Use agricultural terminology familiar to local farmers.`; | |
| } | |
| /** | |
| * Verify that key numeric values from structuredData appear in the summary | |
| */ | |
| const NUMERIC_VERIFICATION_THRESHOLD = 0.5; | |
| function verifyNumericConsistency(structuredData, summary) { | |
| const numbers = extractNumbers(structuredData); | |
| if (numbers.length === 0) return true; // nothing to verify | |
| // Check that significant numbers appear in the summary | |
| let matchCount = 0; | |
| for (const num of numbers) { | |
| const numStr = String(num); | |
| if (summary.includes(numStr)) { | |
| matchCount++; | |
| } | |
| } | |
| // At least NUMERIC_VERIFICATION_THRESHOLD of key numbers should appear in summary | |
| return matchCount >= Math.ceil(numbers.length * NUMERIC_VERIFICATION_THRESHOLD); | |
| } | |
| /** | |
| * Extract numeric values from an object (recursive, top-level only for performance) | |
| */ | |
| function extractNumbers(obj, depth = 0) { | |
| if (depth > 3) return []; | |
| const nums = []; | |
| if (obj === null || obj === undefined) return nums; | |
| if (typeof obj === 'number' && isFinite(obj)) { | |
| nums.push(obj); | |
| } else if (Array.isArray(obj)) { | |
| for (const item of obj.slice(0, 20)) { | |
| nums.push(...extractNumbers(item, depth + 1)); | |
| } | |
| } else if (typeof obj === 'object') { | |
| for (const val of Object.values(obj).slice(0, 30)) { | |
| nums.push(...extractNumbers(val, depth + 1)); | |
| } | |
| } | |
| return nums.slice(0, 20); // limit to avoid performance issues | |
| } | |
| /** | |
| * Build a safe template-based fallback summary when AI is unavailable or fails verification | |
| */ | |
| function buildFallbackSummary(structuredData, domainContext, langName) { | |
| const entries = Object.entries(structuredData).slice(0, 15); | |
| const lines = entries.map(([key, val]) => { | |
| const label = key.replace(/([A-Z])/g, ' $1').replace(/_/g, ' ').trim(); | |
| const value = typeof val === 'object' ? JSON.stringify(val) : String(val); | |
| return `β’ ${label}: ${value}`; | |
| }); | |
| return `[${domainContext}] (${langName})\n\n${lines.join('\n')}`; | |
| } | |
| export default { | |
| orchestrate, | |
| extractLanguage, | |
| getLanguageName, | |
| LANGUAGE_MAP, | |
| }; | |