File size: 5,369 Bytes
529090e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import { Router } from 'express';
import { SragRepository } from './sragRepository.js';
import { SragQueryRequest } from '@widget-tdc/mcp-types';
import { v4 as uuidv4 } from 'uuid';

export const sragRouter = Router();
const sragRepo = new SragRepository();

// Enhanced query classification with ML-like features
function classifyQueryType(query: string): { type: 'analytical' | 'semantic'; confidence: number; features: string[] } {
  const lowerQuery = query.toLowerCase();

  // Analytical indicators with weights
  const analyticalPatterns = [
    { pattern: /\b(sum|count|average|total|maximum|minimum|max|min|avg)\b/g, weight: 3 },
    { pattern: /\b(group by|order by|where|having|join|union)\b/g, weight: 2 },
    { pattern: /\b(compare|comparison|difference|delta|change|vs|versus)\b/g, weight: 2 },
    { pattern: /\b(calculate|compute|aggregate|breakdown)\b/g, weight: 2 },
    { pattern: /\b(last|previous|current|this|quarter|month|year|period)\b/g, weight: 1 },
    { pattern: /\b(cost|spend|revenue|profit|budget|expense)\b/g, weight: 1.5 },
  ];

  // Semantic indicators with weights
  const semanticPatterns = [
    { pattern: /\b(explain|describe|what is|how does|why)\b/g, weight: 3 },
    { pattern: /\b(meaning|definition|concept|overview|summary)\b/g, weight: 2 },
    { pattern: /\b(impact|effect|influence|consequence)\b/g, weight: 2 },
    { pattern: /\b(find|locate|search|discover|explore)\b/g, weight: 1.5 },
    { pattern: /\b(understand|clarify|elaborate|detail)\b/g, weight: 1.5 },
  ];

  let analyticalScore = 0;
  let semanticScore = 0;
  const features: string[] = [];

  // Calculate analytical score
  analyticalPatterns.forEach(({ pattern, weight }) => {
    const matches = lowerQuery.match(pattern);
    if (matches) {
      analyticalScore += matches.length * weight;
      features.push(`analytical: ${pattern.source} (${matches.length})`);
    }
  });

  // Calculate semantic score
  semanticPatterns.forEach(({ pattern, weight }) => {
    const matches = lowerQuery.match(pattern);
    if (matches) {
      semanticScore += matches.length * weight;
      features.push(`semantic: ${pattern.source} (${matches.length})`);
    }
  });

  // Length-based heuristic (longer queries tend to be semantic)
  const wordCount = query.split(/\s+/).length;
  if (wordCount > 10) {
    semanticScore += 1;
    features.push(`length: ${wordCount} words`);
  }

  // Question mark heuristic
  if (query.includes('?')) {
    semanticScore += 2;
    features.push('question_mark');
  }

  // Calculate confidence and determine type
  const totalScore = analyticalScore + semanticScore;
  const confidence = totalScore > 0 ? Math.max(analyticalScore, semanticScore) / totalScore : 0.5;

  const type = analyticalScore > semanticScore ? 'analytical' : 'semantic';

  return { type, confidence, features };
}

// Query endpoint - determines if query is analytical or semantic
sragRouter.post('/query', async (req, res) => {
  try {
    const request: SragQueryRequest = req.body;

    if (!request.orgId || !request.naturalLanguageQuery) {
      return res.status(400).json({
        error: 'Missing required fields: orgId, naturalLanguageQuery',
      });
    }

    // Enhanced ML-based query classification with confidence scoring
    const classification = classifyQueryType(request.naturalLanguageQuery);
    const isAnalytical = classification.type === 'analytical';

    const traceId = uuidv4();

    if (isAnalytical) {
      // For analytical queries, query structured facts
      const facts = await sragRepo.queryFacts(request.orgId);

      res.json({
        type: 'analytical',
        result: facts,
        sqlQuery: 'SELECT * FROM structured_facts WHERE org_id = ?',
        metadata: {
          traceId,
          docIds: facts.map(f => f.doc_id).filter(Boolean),
          classification: classification,
        },
      });
    } else {
      // For semantic queries, search documents
      const keywords = request.naturalLanguageQuery.split(' ').filter((w: string) => w.length > 3);
      const documents = keywords.length > 0
        ? await sragRepo.searchDocuments(request.orgId, keywords[0])
        : [];

      res.json({
        type: 'semantic',
        result: documents,
        sqlQuery: null,
        metadata: {
          traceId,
          docIds: documents.map(d => d.id),
          classification: classification,
        },
      });
    }
  } catch (error: any) {
    console.error('SRAG query error:', error);
    res.status(500).json({
      success: false,
      error: error.message,
    });
  }
});

// Ingest document
sragRouter.post('/ingest/document', async (req, res) => {
  try {
    const input = req.body;
    const docId = await sragRepo.ingestDocument(input);

    res.json({
      success: true,
      docId,
    });
  } catch (error: any) {
    console.error('Document ingest error:', error);
    res.status(500).json({
      success: false,
      error: error.message,
    });
  }
});

// Ingest structured fact
sragRouter.post('/ingest/fact', async (req, res) => {
  try {
    const input = req.body;
    const factId = await sragRepo.ingestFact(input);

    res.json({
      success: true,
      factId,
    });
  } catch (error: any) {
    console.error('Fact ingest error:', error);
    res.status(500).json({
      success: false,
      error: error.message,
    });
  }
});