Spaces:
Paused
Paused
File size: 5,369 Bytes
529090e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 | import { Router } from 'express';
import { SragRepository } from './sragRepository.js';
import { SragQueryRequest } from '@widget-tdc/mcp-types';
import { v4 as uuidv4 } from 'uuid';
export const sragRouter = Router();
const sragRepo = new SragRepository();
// Enhanced query classification with ML-like features
function classifyQueryType(query: string): { type: 'analytical' | 'semantic'; confidence: number; features: string[] } {
const lowerQuery = query.toLowerCase();
// Analytical indicators with weights
const analyticalPatterns = [
{ pattern: /\b(sum|count|average|total|maximum|minimum|max|min|avg)\b/g, weight: 3 },
{ pattern: /\b(group by|order by|where|having|join|union)\b/g, weight: 2 },
{ pattern: /\b(compare|comparison|difference|delta|change|vs|versus)\b/g, weight: 2 },
{ pattern: /\b(calculate|compute|aggregate|breakdown)\b/g, weight: 2 },
{ pattern: /\b(last|previous|current|this|quarter|month|year|period)\b/g, weight: 1 },
{ pattern: /\b(cost|spend|revenue|profit|budget|expense)\b/g, weight: 1.5 },
];
// Semantic indicators with weights
const semanticPatterns = [
{ pattern: /\b(explain|describe|what is|how does|why)\b/g, weight: 3 },
{ pattern: /\b(meaning|definition|concept|overview|summary)\b/g, weight: 2 },
{ pattern: /\b(impact|effect|influence|consequence)\b/g, weight: 2 },
{ pattern: /\b(find|locate|search|discover|explore)\b/g, weight: 1.5 },
{ pattern: /\b(understand|clarify|elaborate|detail)\b/g, weight: 1.5 },
];
let analyticalScore = 0;
let semanticScore = 0;
const features: string[] = [];
// Calculate analytical score
analyticalPatterns.forEach(({ pattern, weight }) => {
const matches = lowerQuery.match(pattern);
if (matches) {
analyticalScore += matches.length * weight;
features.push(`analytical: ${pattern.source} (${matches.length})`);
}
});
// Calculate semantic score
semanticPatterns.forEach(({ pattern, weight }) => {
const matches = lowerQuery.match(pattern);
if (matches) {
semanticScore += matches.length * weight;
features.push(`semantic: ${pattern.source} (${matches.length})`);
}
});
// Length-based heuristic (longer queries tend to be semantic)
const wordCount = query.split(/\s+/).length;
if (wordCount > 10) {
semanticScore += 1;
features.push(`length: ${wordCount} words`);
}
// Question mark heuristic
if (query.includes('?')) {
semanticScore += 2;
features.push('question_mark');
}
// Calculate confidence and determine type
const totalScore = analyticalScore + semanticScore;
const confidence = totalScore > 0 ? Math.max(analyticalScore, semanticScore) / totalScore : 0.5;
const type = analyticalScore > semanticScore ? 'analytical' : 'semantic';
return { type, confidence, features };
}
// Query endpoint - determines if query is analytical or semantic
sragRouter.post('/query', async (req, res) => {
try {
const request: SragQueryRequest = req.body;
if (!request.orgId || !request.naturalLanguageQuery) {
return res.status(400).json({
error: 'Missing required fields: orgId, naturalLanguageQuery',
});
}
// Enhanced ML-based query classification with confidence scoring
const classification = classifyQueryType(request.naturalLanguageQuery);
const isAnalytical = classification.type === 'analytical';
const traceId = uuidv4();
if (isAnalytical) {
// For analytical queries, query structured facts
const facts = await sragRepo.queryFacts(request.orgId);
res.json({
type: 'analytical',
result: facts,
sqlQuery: 'SELECT * FROM structured_facts WHERE org_id = ?',
metadata: {
traceId,
docIds: facts.map(f => f.doc_id).filter(Boolean),
classification: classification,
},
});
} else {
// For semantic queries, search documents
const keywords = request.naturalLanguageQuery.split(' ').filter((w: string) => w.length > 3);
const documents = keywords.length > 0
? await sragRepo.searchDocuments(request.orgId, keywords[0])
: [];
res.json({
type: 'semantic',
result: documents,
sqlQuery: null,
metadata: {
traceId,
docIds: documents.map(d => d.id),
classification: classification,
},
});
}
} catch (error: any) {
console.error('SRAG query error:', error);
res.status(500).json({
success: false,
error: error.message,
});
}
});
// Ingest document
sragRouter.post('/ingest/document', async (req, res) => {
try {
const input = req.body;
const docId = await sragRepo.ingestDocument(input);
res.json({
success: true,
docId,
});
} catch (error: any) {
console.error('Document ingest error:', error);
res.status(500).json({
success: false,
error: error.message,
});
}
});
// Ingest structured fact
sragRouter.post('/ingest/fact', async (req, res) => {
try {
const input = req.body;
const factId = await sragRepo.ingestFact(input);
res.json({
success: true,
factId,
});
} catch (error: any) {
console.error('Fact ingest error:', error);
res.status(500).json({
success: false,
error: error.message,
});
}
});
|