Spaces:
Paused
Paused
| import { Router } from 'express'; | |
| import { SragRepository } from './sragRepository.js'; | |
| import { SragQueryRequest } from '@widget-tdc/mcp-types'; | |
| import { v4 as uuidv4 } from 'uuid'; | |
| export const sragRouter = Router(); | |
| const sragRepo = new SragRepository(); | |
| // Enhanced query classification with ML-like features | |
| function classifyQueryType(query: string): { type: 'analytical' | 'semantic'; confidence: number; features: string[] } { | |
| const lowerQuery = query.toLowerCase(); | |
| // Analytical indicators with weights | |
| const analyticalPatterns = [ | |
| { pattern: /\b(sum|count|average|total|maximum|minimum|max|min|avg)\b/g, weight: 3 }, | |
| { pattern: /\b(group by|order by|where|having|join|union)\b/g, weight: 2 }, | |
| { pattern: /\b(compare|comparison|difference|delta|change|vs|versus)\b/g, weight: 2 }, | |
| { pattern: /\b(calculate|compute|aggregate|breakdown)\b/g, weight: 2 }, | |
| { pattern: /\b(last|previous|current|this|quarter|month|year|period)\b/g, weight: 1 }, | |
| { pattern: /\b(cost|spend|revenue|profit|budget|expense)\b/g, weight: 1.5 }, | |
| ]; | |
| // Semantic indicators with weights | |
| const semanticPatterns = [ | |
| { pattern: /\b(explain|describe|what is|how does|why)\b/g, weight: 3 }, | |
| { pattern: /\b(meaning|definition|concept|overview|summary)\b/g, weight: 2 }, | |
| { pattern: /\b(impact|effect|influence|consequence)\b/g, weight: 2 }, | |
| { pattern: /\b(find|locate|search|discover|explore)\b/g, weight: 1.5 }, | |
| { pattern: /\b(understand|clarify|elaborate|detail)\b/g, weight: 1.5 }, | |
| ]; | |
| let analyticalScore = 0; | |
| let semanticScore = 0; | |
| const features: string[] = []; | |
| // Calculate analytical score | |
| analyticalPatterns.forEach(({ pattern, weight }) => { | |
| const matches = lowerQuery.match(pattern); | |
| if (matches) { | |
| analyticalScore += matches.length * weight; | |
| features.push(`analytical: ${pattern.source} (${matches.length})`); | |
| } | |
| }); | |
| // Calculate semantic score | |
| semanticPatterns.forEach(({ pattern, weight }) => { | |
| const matches = lowerQuery.match(pattern); | |
| if (matches) { | |
| semanticScore += matches.length * weight; | |
| features.push(`semantic: ${pattern.source} (${matches.length})`); | |
| } | |
| }); | |
| // Length-based heuristic (longer queries tend to be semantic) | |
| const wordCount = query.split(/\s+/).length; | |
| if (wordCount > 10) { | |
| semanticScore += 1; | |
| features.push(`length: ${wordCount} words`); | |
| } | |
| // Question mark heuristic | |
| if (query.includes('?')) { | |
| semanticScore += 2; | |
| features.push('question_mark'); | |
| } | |
| // Calculate confidence and determine type | |
| const totalScore = analyticalScore + semanticScore; | |
| const confidence = totalScore > 0 ? Math.max(analyticalScore, semanticScore) / totalScore : 0.5; | |
| const type = analyticalScore > semanticScore ? 'analytical' : 'semantic'; | |
| return { type, confidence, features }; | |
| } | |
| // Query endpoint - determines if query is analytical or semantic | |
| sragRouter.post('/query', async (req, res) => { | |
| try { | |
| const request: SragQueryRequest = req.body; | |
| if (!request.orgId || !request.naturalLanguageQuery) { | |
| return res.status(400).json({ | |
| error: 'Missing required fields: orgId, naturalLanguageQuery', | |
| }); | |
| } | |
| // Enhanced ML-based query classification with confidence scoring | |
| const classification = classifyQueryType(request.naturalLanguageQuery); | |
| const isAnalytical = classification.type === 'analytical'; | |
| const traceId = uuidv4(); | |
| if (isAnalytical) { | |
| // For analytical queries, query structured facts | |
| const facts = await sragRepo.queryFacts(request.orgId); | |
| res.json({ | |
| type: 'analytical', | |
| result: facts, | |
| sqlQuery: 'SELECT * FROM structured_facts WHERE org_id = ?', | |
| metadata: { | |
| traceId, | |
| docIds: facts.map(f => f.doc_id).filter(Boolean), | |
| classification: classification, | |
| }, | |
| }); | |
| } else { | |
| // For semantic queries, search documents | |
| const keywords = request.naturalLanguageQuery.split(' ').filter((w: string) => w.length > 3); | |
| const documents = keywords.length > 0 | |
| ? await sragRepo.searchDocuments(request.orgId, keywords[0]) | |
| : []; | |
| res.json({ | |
| type: 'semantic', | |
| result: documents, | |
| sqlQuery: null, | |
| metadata: { | |
| traceId, | |
| docIds: documents.map(d => d.id), | |
| classification: classification, | |
| }, | |
| }); | |
| } | |
| } catch (error: any) { | |
| console.error('SRAG query error:', error); | |
| res.status(500).json({ | |
| success: false, | |
| error: error.message, | |
| }); | |
| } | |
| }); | |
| // Ingest document | |
| sragRouter.post('/ingest/document', async (req, res) => { | |
| try { | |
| const input = req.body; | |
| const docId = await sragRepo.ingestDocument(input); | |
| res.json({ | |
| success: true, | |
| docId, | |
| }); | |
| } catch (error: any) { | |
| console.error('Document ingest error:', error); | |
| res.status(500).json({ | |
| success: false, | |
| error: error.message, | |
| }); | |
| } | |
| }); | |
| // Ingest structured fact | |
| sragRouter.post('/ingest/fact', async (req, res) => { | |
| try { | |
| const input = req.body; | |
| const factId = await sragRepo.ingestFact(input); | |
| res.json({ | |
| success: true, | |
| factId, | |
| }); | |
| } catch (error: any) { | |
| console.error('Fact ingest error:', error); | |
| res.status(500).json({ | |
| success: false, | |
| error: error.message, | |
| }); | |
| } | |
| }); | |