GlazedDon0t commited on
Commit
4b424d6
·
1 Parent(s): 1c08c4a
Files changed (6) hide show
  1. frontend/src/App.tsx +121 -125
  2. src/app.py +205 -72
  3. src/benchmarking.py +229 -224
  4. src/common_utils.py +110 -104
  5. src/factuality_logic.py +12 -23
  6. src/inference_logic.py +321 -207
frontend/src/App.tsx CHANGED
@@ -10,77 +10,74 @@ import {
10
 
11
  function App() {
12
  const[activeTab, setActiveTab] = useState('home');
13
- const [logs, setLogs] = useState<string>('System Ready.\n');
14
- const[isProcessing, setIsProcessing] = useState(false);
15
  const logContainerRef = useRef<HTMLDivElement>(null);
16
 
17
  // Processing Config State
18
  const [modelProvider, setModelProvider] = useState('nrp');
19
- const [apiKey, setApiKey] = useState('');
20
  const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
21
  const[modelName, setModelName] = useState('qwen3'); // Default
22
  const[projectId, setProjectId] = useState('');
23
  const [location, setLocation] = useState('us-central1');
24
- const [includeComments, setIncludeComments] = useState(false);
25
  const[reasoningMethod, setReasoningMethod] = useState('cot');
26
  const [promptTemplate, setPromptTemplate] = useState('standard');
27
- const [customQuery, setCustomQuery] = useState('');
28
- const[maxRetries, setMaxRetries] = useState(1);
29
  const [availablePrompts, setAvailablePrompts] = useState<any[]>([]);
30
-
31
- // Predictive Config
32
- const [predictiveModelType, setPredictiveModelType] = useState('logistic');
33
- const [predictiveResult, setPredictiveResult] = useState<any>(null);
34
 
35
  // Data States
36
- const [queueList, setQueueList] = useState<any[]>([]);
37
- const[selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
38
- const[expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
39
  const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
40
 
41
- const[singleLinkInput, setSingleLinkInput] = useState('');
42
  const [profileList, setProfileList] = useState<any[]>([]);
43
- const [selectedProfile, setSelectedProfile] = useState<any>(null);
44
- const[profilePosts, setProfilePosts] = useState<any[]>([]);
45
- const [communityDatasets, setCommunityDatasets] = useState<any[]>([]);
46
- const [communityAnalysis, setCommunityAnalysis] = useState<any>(null);
47
- const[integrityBoard, setIntegrityBoard] = useState<any[]>([]);
48
 
49
  const[datasetList, setDatasetList] = useState<any[]>([]);
50
- const [selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
51
- const [lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
52
 
53
  const [benchmarks, setBenchmarks] = useState<any>(null);
54
- const[leaderboard, setLeaderboard] = useState<any[]>([]);
55
- const [refreshTrigger, setRefreshTrigger] = useState(0);
56
 
57
  // Tags
58
- const [configuredTags, setConfiguredTags] = useState<any>({});
59
 
60
  // Manual Labeling State
61
- const [manualLink, setManualLink] = useState('');
62
- const[manualCaption, setManualCaption] = useState('');
63
  const [manualTags, setManualTags] = useState('');
64
  const[manualReasoning, setManualReasoning] = useState('');
65
- const [manualScores, setManualScores] = useState({
66
  visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
67
  va: 5, vc: 5, ac: 5, final: 50
68
  });
69
- const[showRubric, setShowRubric] = useState(false);
70
- const [aiReference, setAiReference] = useState<any>(null);
71
- const [labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
72
- const [labelFilter, setLabelFilter] = useState('');
73
 
74
  // Agent Chat State
75
- const[agentInput, setAgentInput] = useState('');
76
- const [agentMessages, setAgentMessages] = useState<any[]>([]);
77
- const [agentThinking, setAgentThinking] = useState(false);
78
- const[agentEndpoint, setAgentEndpoint] = useState('/a2a');
79
- const [agentMethod, setAgentMethod] = useState('agent.process');
80
  const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
81
 
82
  // Resampling configuration
83
- const[resampleCount, setResampleCount] = useState<number>(1);
84
 
85
  // Drag Selection references
86
  const isDraggingQueueRef = useRef(false);
@@ -89,9 +86,9 @@ function App() {
89
  // Quick Demo State
90
  const[demoLink, setDemoLink] = useState('');
91
  const [demoLogs, setDemoLogs] = useState('');
92
- const [demoIsProcessing, setDemoIsProcessing] = useState(false);
93
  const[demoResult, setDemoResult] = useState<any>(null);
94
- const[showDemoConfig, setShowDemoConfig] = useState(false);
95
  const demoLogContainerRef = useRef<HTMLDivElement>(null);
96
 
97
  useEffect(() => {
@@ -125,7 +122,6 @@ function App() {
125
  setLastQueueIndex(null);
126
  }
127
  if (activeTab === 'profiles') load('/profiles/list', setProfileList);
128
- if (activeTab === 'community') load('/community/list_datasets', setCommunityDatasets);
129
  if (activeTab === 'analytics') load('/analytics/account_integrity', setIntegrityBoard);
130
  if (activeTab === 'dataset' || activeTab === 'manual' || activeTab === 'groundtruth') load('/dataset/list', setDatasetList);
131
  if (activeTab === 'manual') load('/queue/list', setQueueList);
@@ -409,28 +405,6 @@ function App() {
409
  } catch(e: any) { alert("Network error: " + e.toString()); }
410
  };
411
 
412
- const analyzeComments = async (id: string) => {
413
- setCommunityAnalysis({ verdict: "Analyzing..." });
414
- const res = await fetch('/community/analyze', {
415
- method: 'POST', headers: {'Content-Type': 'application/json'},
416
- body: JSON.stringify({ dataset_id: id })
417
- });
418
- setCommunityAnalysis(await res.json());
419
- };
420
-
421
- const runPredictiveTraining = async (useVisual: boolean) => {
422
- setPredictiveResult({ status: 'training' });
423
- try {
424
- const res = await fetch('/benchmarks/train_predictive', {
425
- method: 'POST', headers: {'Content-Type': 'application/json'},
426
- body: JSON.stringify({ use_visual_meta: useVisual, model_type: predictiveModelType })
427
- });
428
- const data = await res.json();
429
- setPredictiveResult(data);
430
- setRefreshTrigger(p => p+1);
431
- } catch (e) { setPredictiveResult({ error: "Failed to train." }); }
432
- };
433
-
434
  const queueUnlabeledPosts = async () => {
435
  const unlabeled = profilePosts.filter(p => !p.is_labeled).map(p => p.link);
436
  if(unlabeled.length === 0) return alert("All posts already labeled!");
@@ -547,6 +521,9 @@ function App() {
547
  fd.append('prompt_template', promptTemplate);
548
  fd.append('custom_query', customQuery);
549
  fd.append('max_reprompts', maxRetries.toString());
 
 
 
550
 
551
  try {
552
  const res = await fetch('/queue/run', { method: 'POST', body: fd });
@@ -597,6 +574,9 @@ function App() {
597
  fd.append('prompt_template', promptTemplate);
598
  fd.append('custom_query', customQuery);
599
  fd.append('max_reprompts', maxRetries.toString());
 
 
 
600
 
601
  setDemoLogs(prev => prev + '[SYSTEM] Sending analysis payload to model server...\n');
602
 
@@ -736,13 +716,11 @@ function App() {
736
  {[
737
  {id:'home', l:'Home & Benchmarks', i:Home},
738
  {id:'agent', l:'Agent Nexus', i:Bot},
739
- {id:'predictive', l:'Predictive Sandbox', i:FlaskConical},
740
  {id:'queue', l:'Ingest Queue', i:List},
741
  {id:'profiles', l:'User Profiles', i:Users},
742
  {id:'manual', l:'Labeling Studio', i:PenTool},
743
  {id:'dataset', l:'Data Manager', i:Archive},
744
  {id:'groundtruth', l:'Ground Truth (Verified)', i:ShieldCheck},
745
- {id:'community', l:'Community Trust', i:MessageSquare},
746
  {id:'analytics', l:'Analytics', i:BarChart2}
747
  ].map(t => (
748
  <button key={t.id} onClick={() => setActiveTab(t.id)}
@@ -817,6 +795,7 @@ function App() {
817
  <div className="space-y-3">
818
  <label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Inference Strategy</label>
819
  <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
 
820
  <option value="cot">Standard Chain of Thought</option>
821
  <option value="fcot">Fractal Chain of Thought</option>
822
  </select>
@@ -825,6 +804,16 @@ function App() {
825
  <option key={p.id} value={p.id}>{p.name}</option>
826
  )) : <option value="standard">Standard</option>}
827
  </select>
 
 
 
 
 
 
 
 
 
 
828
  </div>
829
  </div>
830
  )}
@@ -1012,11 +1001,11 @@ function App() {
1012
  <th className="p-3">Model</th>
1013
  <th className="p-3">Prompt</th>
1014
  <th className="p-3">Reasoning</th>
 
1015
  <th className="p-3 text-center">FCoT Depth</th>
1016
  <th className="p-3 text-right text-emerald-400">Accuracy</th>
1017
  <th className="p-3 text-right">Comp. MAE</th>
1018
  <th className="p-3 text-right">Tag Acc</th>
1019
- <th className="p-3 text-right">Samples</th>
1020
  <th className="p-3"></th>
1021
  </tr>
1022
  </thead>
@@ -1027,17 +1016,18 @@ function App() {
1027
  <td className="p-3 font-mono text-white">{row.model}</td>
1028
  <td className="p-3">{row.prompt}</td>
1029
  <td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
 
1030
  <td className="p-3 text-center text-slate-400 font-mono">{row.fcot_depth ?? 0}</td>
1031
  <td className="p-3 text-right font-bold text-emerald-400">{row.accuracy}%</td>
1032
  <td className="p-3 text-right font-mono text-amber-400">{row.comp_mae}</td>
1033
  <td className="p-3 text-right">{row.tag_acc}%</td>
1034
- <td className="p-3 text-right text-slate-500">{row.samples}</td>
1035
- <td className="p-3 text-center" title={row.params}>
1036
  <div className="group relative">
1037
  <HelpCircle className="w-4 h-4 text-slate-600 cursor-help"/>
1038
  <div className="absolute right-0 bottom-6 w-64 p-3 bg-black border border-slate-700 rounded shadow-xl hidden group-hover:block z-50 text-[10px] whitespace-pre-wrap text-left">
1039
  <div className="font-bold mb-1 text-slate-400">Config Params</div>
1040
- {row.params}
 
1041
  </div>
1042
  </div>
1043
  </td>
@@ -1050,6 +1040,54 @@ function App() {
1050
  </table>
1051
  </div>
1052
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1053
  </div>
1054
  )}
1055
 
@@ -1124,6 +1162,7 @@ function App() {
1124
  <div className="space-y-1 mt-2">
1125
  <label className="text-[10px] text-slate-500">Reasoning Method</label>
1126
  <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
 
1127
  <option value="cot">Standard Chain of Thought</option>
1128
  <option value="fcot">Fractal Chain of Thought</option>
1129
  </select>
@@ -1211,31 +1250,6 @@ function App() {
1211
  </div>
1212
  )}
1213
 
1214
- {/* PREDICTIVE SANDBOX */}
1215
- {activeTab === 'predictive' && (
1216
- <div className="flex h-full gap-6">
1217
- <div className="w-1/3 bg-slate-900/50 border border-slate-800 rounded-xl p-6 flex flex-col gap-6">
1218
- <div>
1219
- <h2 className="text-lg font-bold text-white flex items-center gap-2"><FlaskConical className="w-5 h-5"/> Model Sandbox</h2>
1220
- <p className="text-xs text-slate-400">Train models on the text features of the current Ground Truth dataset.</p>
1221
- </div>
1222
- <button onClick={() => runPredictiveTraining(false)} className="w-full py-3 bg-indigo-600 hover:bg-indigo-500 text-white rounded font-bold text-xs">Train Baseline</button>
1223
- </div>
1224
- <div className="flex-1 bg-slate-900/50 border border-slate-800 rounded-xl p-6 relative overflow-hidden overflow-y-auto">
1225
- {predictiveResult ? (
1226
- predictiveResult.status === 'training' ? (
1227
- <div className="absolute inset-0 flex items-center justify-center text-indigo-400 animate-pulse">Training Model...</div>
1228
- ) : predictiveResult.error ? ( <div className="text-red-400">{predictiveResult.error}</div> ) : (
1229
- <div className="space-y-6">
1230
- <div className="text-xl font-mono text-white">Training Complete ({predictiveResult.type})</div>
1231
- <pre className="text-xs text-slate-400 bg-black p-4 rounded">{JSON.stringify(predictiveResult, null, 2)}</pre>
1232
- </div>
1233
- )
1234
- ) : <div className="flex h-full items-center justify-center text-slate-600">Ready to train.</div>}
1235
- </div>
1236
- </div>
1237
- )}
1238
-
1239
  {/* QUEUE TAB */}
1240
  {activeTab === 'queue' && (
1241
  <div className="flex h-full gap-6">
@@ -1320,6 +1334,7 @@ function App() {
1320
  <div className="space-y-1 mt-2">
1321
  <label className="text-[10px] text-slate-500">Reasoning Method</label>
1322
  <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
 
1323
  <option value="cot">Standard Chain of Thought</option>
1324
  <option value="fcot">Fractal Chain of Thought</option>
1325
  </select>
@@ -1333,6 +1348,18 @@ function App() {
1333
  </select>
1334
  </div>
1335
 
 
 
 
 
 
 
 
 
 
 
 
 
1336
  {/* Process Controls */}
1337
  {isProcessing ? (
1338
  <button onClick={stopProcessing} className="w-full py-2 bg-red-600 hover:bg-red-500 text-white rounded font-bold text-xs flex items-center justify-center gap-2 animate-pulse">
@@ -1721,7 +1748,7 @@ function App() {
1721
  <span className="capitalize text-slate-300 font-bold">{k}</span>
1722
  <span className="text-indigo-400 font-mono font-bold">{(manualScores as any)[k]}/10</span>
1723
  </div>
1724
- <input type="range" min="1" max="10" value={(manualScores as any)[k]} onChange={e => setManualScores({...manualScores, [k]: parseInt(e.target.value)})} className="w-full accent-indigo-500"/>
1725
  </div>
1726
  ))}
1727
  </div>
@@ -1813,37 +1840,6 @@ function App() {
1813
  </div>
1814
  )}
1815
 
1816
- {/* COMMUNITY AND ANALYTICS TABS (UNCHANGED) */}
1817
- {activeTab === 'community' && (
1818
- <div className="flex h-full gap-6">
1819
- <div className="w-1/3 bg-slate-900/50 border border-slate-800 rounded-xl overflow-auto">
1820
- <div className="p-3 bg-slate-950 border-b border-slate-800 text-xs font-bold text-slate-400">Comment Datasets</div>
1821
- {communityDatasets.map((d, i) => (
1822
- <div key={i} onClick={() => analyzeComments(d.id)} className="p-4 border-b border-slate-800/50 cursor-pointer hover:bg-white/5">
1823
- <div className="text-xs font-mono text-indigo-400 mb-1">{d.id}</div>
1824
- <div className="text-[10px] text-slate-500">{d.count} comments</div>
1825
- </div>
1826
- ))}
1827
- </div>
1828
- <div className="flex-1 flex flex-col justify-center items-center bg-slate-900/20 border border-slate-800 rounded-xl p-8">
1829
- {communityAnalysis ? (
1830
- <div className="text-center w-full max-w-md">
1831
- <div className="text-xs uppercase text-slate-500 mb-2 tracking-widest">Community Quantization</div>
1832
- <h2 className="text-5xl font-bold text-white mb-2">{communityAnalysis.trust_score?.toFixed(0)}<span className="text-xl text-slate-600">/100</span></h2>
1833
- <div className={`text-lg font-bold mb-8 px-4 py-1 rounded-full inline-block ${communityAnalysis.trust_score < 40 ? 'bg-red-500/10 text-red-400' : 'bg-emerald-500/10 text-emerald-400'}`}>
1834
- {communityAnalysis.verdict}
1835
- </div>
1836
- </div>
1837
- ) : (
1838
- <div className="text-slate-600 flex flex-col items-center">
1839
- <MessageSquare className="w-12 h-12 mb-4 opacity-20"/>
1840
- <span>Select a dataset to analyze community sentiment.</span>
1841
- </div>
1842
- )}
1843
- </div>
1844
- </div>
1845
- )}
1846
-
1847
  {activeTab === 'analytics' && (
1848
  <div className="h-full overflow-auto">
1849
  <div className="flex items-center justify-between mb-4">
 
10
 
11
  function App() {
12
  const[activeTab, setActiveTab] = useState('home');
13
+ const[logs, setLogs] = useState<string>('System Ready.\n');
14
+ const [isProcessing, setIsProcessing] = useState(false);
15
  const logContainerRef = useRef<HTMLDivElement>(null);
16
 
17
  // Processing Config State
18
  const [modelProvider, setModelProvider] = useState('nrp');
19
+ const[apiKey, setApiKey] = useState('');
20
  const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
21
  const[modelName, setModelName] = useState('qwen3'); // Default
22
  const[projectId, setProjectId] = useState('');
23
  const [location, setLocation] = useState('us-central1');
24
+ const[includeComments, setIncludeComments] = useState(false);
25
  const[reasoningMethod, setReasoningMethod] = useState('cot');
26
  const [promptTemplate, setPromptTemplate] = useState('standard');
27
+ const[customQuery, setCustomQuery] = useState('');
28
+ const [maxRetries, setMaxRetries] = useState(1);
29
  const [availablePrompts, setAvailablePrompts] = useState<any[]>([]);
30
+
31
+ const [useSearch, setUseSearch] = useState(false);
32
+ const[useCode, setUseCode] = useState(false);
 
33
 
34
  // Data States
35
+ const[queueList, setQueueList] = useState<any[]>([]);
36
+ const [selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
37
+ const [expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
38
  const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
39
 
40
+ const [singleLinkInput, setSingleLinkInput] = useState('');
41
  const [profileList, setProfileList] = useState<any[]>([]);
42
+ const[selectedProfile, setSelectedProfile] = useState<any>(null);
43
+ const [profilePosts, setProfilePosts] = useState<any[]>([]);
44
+ const [integrityBoard, setIntegrityBoard] = useState<any[]>([]);
 
 
45
 
46
  const[datasetList, setDatasetList] = useState<any[]>([]);
47
+ const[selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
48
+ const[lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
49
 
50
  const [benchmarks, setBenchmarks] = useState<any>(null);
51
+ const [leaderboard, setLeaderboard] = useState<any[]>([]);
52
+ const[refreshTrigger, setRefreshTrigger] = useState(0);
53
 
54
  // Tags
55
+ const[configuredTags, setConfiguredTags] = useState<any>({});
56
 
57
  // Manual Labeling State
58
+ const[manualLink, setManualLink] = useState('');
59
+ const [manualCaption, setManualCaption] = useState('');
60
  const [manualTags, setManualTags] = useState('');
61
  const[manualReasoning, setManualReasoning] = useState('');
62
+ const[manualScores, setManualScores] = useState({
63
  visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
64
  va: 5, vc: 5, ac: 5, final: 50
65
  });
66
+ const [showRubric, setShowRubric] = useState(false);
67
+ const[aiReference, setAiReference] = useState<any>(null);
68
+ const[labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
69
+ const[labelFilter, setLabelFilter] = useState('');
70
 
71
  // Agent Chat State
72
+ const [agentInput, setAgentInput] = useState('');
73
+ const[agentMessages, setAgentMessages] = useState<any[]>([]);
74
+ const[agentThinking, setAgentThinking] = useState(false);
75
+ const [agentEndpoint, setAgentEndpoint] = useState('/a2a');
76
+ const[agentMethod, setAgentMethod] = useState('agent.process');
77
  const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
78
 
79
  // Resampling configuration
80
+ const [resampleCount, setResampleCount] = useState<number>(1);
81
 
82
  // Drag Selection references
83
  const isDraggingQueueRef = useRef(false);
 
86
  // Quick Demo State
87
  const[demoLink, setDemoLink] = useState('');
88
  const [demoLogs, setDemoLogs] = useState('');
89
+ const[demoIsProcessing, setDemoIsProcessing] = useState(false);
90
  const[demoResult, setDemoResult] = useState<any>(null);
91
+ const [showDemoConfig, setShowDemoConfig] = useState(false);
92
  const demoLogContainerRef = useRef<HTMLDivElement>(null);
93
 
94
  useEffect(() => {
 
122
  setLastQueueIndex(null);
123
  }
124
  if (activeTab === 'profiles') load('/profiles/list', setProfileList);
 
125
  if (activeTab === 'analytics') load('/analytics/account_integrity', setIntegrityBoard);
126
  if (activeTab === 'dataset' || activeTab === 'manual' || activeTab === 'groundtruth') load('/dataset/list', setDatasetList);
127
  if (activeTab === 'manual') load('/queue/list', setQueueList);
 
405
  } catch(e: any) { alert("Network error: " + e.toString()); }
406
  };
407
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
  const queueUnlabeledPosts = async () => {
409
  const unlabeled = profilePosts.filter(p => !p.is_labeled).map(p => p.link);
410
  if(unlabeled.length === 0) return alert("All posts already labeled!");
 
521
  fd.append('prompt_template', promptTemplate);
522
  fd.append('custom_query', customQuery);
523
  fd.append('max_reprompts', maxRetries.toString());
524
+
525
+ fd.append('use_search', useSearch.toString());
526
+ fd.append('use_code', useCode.toString());
527
 
528
  try {
529
  const res = await fetch('/queue/run', { method: 'POST', body: fd });
 
574
  fd.append('prompt_template', promptTemplate);
575
  fd.append('custom_query', customQuery);
576
  fd.append('max_reprompts', maxRetries.toString());
577
+
578
+ fd.append('use_search', useSearch.toString());
579
+ fd.append('use_code', useCode.toString());
580
 
581
  setDemoLogs(prev => prev + '[SYSTEM] Sending analysis payload to model server...\n');
582
 
 
716
  {[
717
  {id:'home', l:'Home & Benchmarks', i:Home},
718
  {id:'agent', l:'Agent Nexus', i:Bot},
 
719
  {id:'queue', l:'Ingest Queue', i:List},
720
  {id:'profiles', l:'User Profiles', i:Users},
721
  {id:'manual', l:'Labeling Studio', i:PenTool},
722
  {id:'dataset', l:'Data Manager', i:Archive},
723
  {id:'groundtruth', l:'Ground Truth (Verified)', i:ShieldCheck},
 
724
  {id:'analytics', l:'Analytics', i:BarChart2}
725
  ].map(t => (
726
  <button key={t.id} onClick={() => setActiveTab(t.id)}
 
795
  <div className="space-y-3">
796
  <label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Inference Strategy</label>
797
  <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
798
+ <option value="none">Direct (No CoT)</option>
799
  <option value="cot">Standard Chain of Thought</option>
800
  <option value="fcot">Fractal Chain of Thought</option>
801
  </select>
 
804
  <option key={p.id} value={p.id}>{p.name}</option>
805
  )) : <option value="standard">Standard</option>}
806
  </select>
807
+
808
+ <label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1 mt-3">Agentic Tools</label>
809
+ <label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
810
+ <input type="checkbox" className="accent-indigo-500" checked={useSearch} onChange={e => setUseSearch(e.target.checked)} />
811
+ Enable Web Search Retrieval
812
+ </label>
813
+ <label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
814
+ <input type="checkbox" className="accent-indigo-500" checked={useCode} onChange={e => setUseCode(e.target.checked)} />
815
+ Enable Code Execution
816
+ </label>
817
  </div>
818
  </div>
819
  )}
 
1001
  <th className="p-3">Model</th>
1002
  <th className="p-3">Prompt</th>
1003
  <th className="p-3">Reasoning</th>
1004
+ <th className="p-3 text-center">Tools</th>
1005
  <th className="p-3 text-center">FCoT Depth</th>
1006
  <th className="p-3 text-right text-emerald-400">Accuracy</th>
1007
  <th className="p-3 text-right">Comp. MAE</th>
1008
  <th className="p-3 text-right">Tag Acc</th>
 
1009
  <th className="p-3"></th>
1010
  </tr>
1011
  </thead>
 
1016
  <td className="p-3 font-mono text-white">{row.model}</td>
1017
  <td className="p-3">{row.prompt}</td>
1018
  <td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
1019
+ <td className="p-3 text-center text-sky-400 font-mono text-[10px]">{row.tools || 'None'}</td>
1020
  <td className="p-3 text-center text-slate-400 font-mono">{row.fcot_depth ?? 0}</td>
1021
  <td className="p-3 text-right font-bold text-emerald-400">{row.accuracy}%</td>
1022
  <td className="p-3 text-right font-mono text-amber-400">{row.comp_mae}</td>
1023
  <td className="p-3 text-right">{row.tag_acc}%</td>
1024
+ <td className="p-3 text-center">
 
1025
  <div className="group relative">
1026
  <HelpCircle className="w-4 h-4 text-slate-600 cursor-help"/>
1027
  <div className="absolute right-0 bottom-6 w-64 p-3 bg-black border border-slate-700 rounded shadow-xl hidden group-hover:block z-50 text-[10px] whitespace-pre-wrap text-left">
1028
  <div className="font-bold mb-1 text-slate-400">Config Params</div>
1029
+ <div>{row.params}</div>
1030
+ <div className="mt-2 pt-2 border-t border-slate-800 text-slate-400 font-bold">Samples: {row.samples}</div>
1031
  </div>
1032
  </div>
1033
  </td>
 
1040
  </table>
1041
  </div>
1042
  </div>
1043
+
1044
+ {/* Detailed Vector Accuracies */}
1045
+ <div className="bg-slate-900/50 border border-slate-800 rounded-xl p-6 mt-6 mb-8">
1046
+ <h3 className="text-sm font-bold text-white uppercase mb-4 flex items-center gap-2">
1047
+ <BarChart2 className="w-4 h-4 text-sky-400"/> Detailed Vector Error Analysis (MAE)
1048
+ </h3>
1049
+ <div className="overflow-x-auto">
1050
+ <table className="w-full text-left text-xs text-slate-400">
1051
+ <thead className="bg-slate-950 text-slate-500 uppercase">
1052
+ <tr>
1053
+ <th className="p-3">Model</th>
1054
+ <th className="p-3">Prompt</th>
1055
+ <th className="p-3">Reasoning</th>
1056
+ <th className="p-3">Tools / Techniques</th>
1057
+ <th className="p-3 text-right">Vis</th>
1058
+ <th className="p-3 text-right">Aud</th>
1059
+ <th className="p-3 text-right">Src</th>
1060
+ <th className="p-3 text-right">Log</th>
1061
+ <th className="p-3 text-right">Emo</th>
1062
+ <th className="p-3 text-right">V-A</th>
1063
+ <th className="p-3 text-right">V-C</th>
1064
+ <th className="p-3 text-right">A-C</th>
1065
+ </tr>
1066
+ </thead>
1067
+ <tbody className="divide-y divide-slate-800">
1068
+ {leaderboard && leaderboard.map((row, i) => (
1069
+ <tr key={i} className="hover:bg-white/5">
1070
+ <td className="p-3 font-mono text-white">{row.model}</td>
1071
+ <td className="p-3">{row.prompt}</td>
1072
+ <td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
1073
+ <td className="p-3 text-sky-400 font-mono text-[10px]">{row.tools || 'None'}</td>
1074
+ <td className="p-3 text-right font-mono">{row.err_visual_score ?? '-'}</td>
1075
+ <td className="p-3 text-right font-mono">{row.err_audio_score ?? '-'}</td>
1076
+ <td className="p-3 text-right font-mono">{row.err_source_score ?? '-'}</td>
1077
+ <td className="p-3 text-right font-mono">{row.err_logic_score ?? '-'}</td>
1078
+ <td className="p-3 text-right font-mono">{row.err_emotion_score ?? '-'}</td>
1079
+ <td className="p-3 text-right font-mono">{row.err_align_video_audio ?? '-'}</td>
1080
+ <td className="p-3 text-right font-mono">{row.err_align_video_caption ?? '-'}</td>
1081
+ <td className="p-3 text-right font-mono">{row.err_align_audio_caption ?? '-'}</td>
1082
+ </tr>
1083
+ ))}
1084
+ {(!leaderboard || leaderboard.length === 0) && (
1085
+ <tr><td colSpan={12} className="p-4 text-center text-slate-600">No detailed benchmark data available.</td></tr>
1086
+ )}
1087
+ </tbody>
1088
+ </table>
1089
+ </div>
1090
+ </div>
1091
  </div>
1092
  )}
1093
 
 
1162
  <div className="space-y-1 mt-2">
1163
  <label className="text-[10px] text-slate-500">Reasoning Method</label>
1164
  <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
1165
+ <option value="none">Direct (No CoT)</option>
1166
  <option value="cot">Standard Chain of Thought</option>
1167
  <option value="fcot">Fractal Chain of Thought</option>
1168
  </select>
 
1250
  </div>
1251
  )}
1252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1253
  {/* QUEUE TAB */}
1254
  {activeTab === 'queue' && (
1255
  <div className="flex h-full gap-6">
 
1334
  <div className="space-y-1 mt-2">
1335
  <label className="text-[10px] text-slate-500">Reasoning Method</label>
1336
  <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
1337
+ <option value="none">Direct (No CoT)</option>
1338
  <option value="cot">Standard Chain of Thought</option>
1339
  <option value="fcot">Fractal Chain of Thought</option>
1340
  </select>
 
1348
  </select>
1349
  </div>
1350
 
1351
+ <div className="space-y-2 mt-2">
1352
+ <label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Agentic Tools</label>
1353
+ <label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
1354
+ <input type="checkbox" className="accent-indigo-500" checked={useSearch} onChange={e => setUseSearch(e.target.checked)} />
1355
+ Enable Web Search Retrieval
1356
+ </label>
1357
+ <label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
1358
+ <input type="checkbox" className="accent-indigo-500" checked={useCode} onChange={e => setUseCode(e.target.checked)} />
1359
+ Enable Code Execution
1360
+ </label>
1361
+ </div>
1362
+
1363
  {/* Process Controls */}
1364
  {isProcessing ? (
1365
  <button onClick={stopProcessing} className="w-full py-2 bg-red-600 hover:bg-red-500 text-white rounded font-bold text-xs flex items-center justify-center gap-2 animate-pulse">
 
1748
  <span className="capitalize text-slate-300 font-bold">{k}</span>
1749
  <span className="text-indigo-400 font-mono font-bold">{(manualScores as any)[k]}/10</span>
1750
  </div>
1751
+ <input type="range" min="1" max="10" value={(manualScores as any)[k]} onChange={e => setManualScores({...manualScores,[k]: parseInt(e.target.value)})} className="w-full accent-indigo-500"/>
1752
  </div>
1753
  ))}
1754
  </div>
 
1840
  </div>
1841
  )}
1842
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1843
  {activeTab === 'analytics' && (
1844
  <div className="h-full overflow-auto">
1845
  <div className="flex items-center justify-between mb-4">
src/app.py CHANGED
@@ -1,7 +1,7 @@
1
  import os
2
  import sys
3
 
4
- # --- FIX: Ensure 'src' is in sys.path so sibling imports work ---
5
  current_dir = os.path.dirname(os.path.abspath(__file__))
6
  if current_dir not in sys.path:
7
  sys.path.append(current_dir)
@@ -30,11 +30,12 @@ import agent_logic
30
  import common_utils
31
 
32
  from toon_parser import parse_veracity_toon
33
- from labeling_logic import PROMPT_VARIANTS, LABELING_PROMPT_TEMPLATE, FCOT_MACRO_PROMPT
34
  import benchmarking
35
 
36
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
37
  logger = logging.getLogger(__name__)
 
38
  LITE_MODE = os.getenv("LITE_MODE", "true").lower() == "true"
39
 
40
  app = FastAPI()
@@ -78,19 +79,19 @@ except Exception as e:
78
  agent_mount_status = f"error_{str(e)}"
79
 
80
  # --- Static Files & Frontend ---
81
- STATIC_DIR = "/app/static"
82
- if not os.path.isdir(STATIC_DIR):
83
- if os.path.isdir("/usr/share/vchat/static"):
84
- STATIC_DIR = "/usr/share/vchat/static"
85
- elif os.path.isdir("frontend/dist"):
86
- STATIC_DIR = "frontend/dist"
87
- else:
88
- STATIC_DIR = "static"
89
- os.makedirs(STATIC_DIR, exist_ok=True)
90
 
91
  app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
92
 
93
- # --- FIX: Explicitly mount assets for Vite support ---
94
  assets_path = os.path.join(STATIC_DIR, "assets")
95
  if os.path.exists(assets_path):
96
  app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
@@ -227,10 +228,6 @@ async def get_benchmark_stats():
227
  async def get_benchmark_leaderboard():
228
  return benchmarking.generate_leaderboard()
229
 
230
- @app.post("/benchmarks/train_predictive")
231
- async def run_predictive_training(config: dict = Body(...)):
232
- return benchmarking.train_predictive_sandbox(config)
233
-
234
  @app.get("/config/prompts")
235
  async def list_prompts():
236
  return [{"id": k, "name": v['description']} for k, v in PROMPT_VARIANTS.items()]
@@ -260,7 +257,7 @@ async def list_all_tags():
260
  t = t.strip()
261
  if t: tags_count[t] = tags_count.get(t, 0) + 1
262
  sorted_tags = sorted(tags_count.items(), key=lambda x: x[1], reverse=True)
263
- return [{"name": k, "count": v} for k, v in sorted_tags]
264
 
265
  @app.post("/extension/ingest")
266
  async def extension_ingest_link(request: Request):
@@ -304,7 +301,7 @@ async def promote_to_ground_truth(request: Request):
304
  try:
305
  data = await request.json()
306
  target_ids = data.get("ids",[])
307
- if not target_ids and data.get("id"): target_ids = [data.get("id")]
308
 
309
  if not target_ids: return JSONResponse({"status": "error", "message": "No IDs provided"}, status_code=400)
310
 
@@ -362,7 +359,7 @@ async def delete_ground_truth(request: Request):
362
  try:
363
  data = await request.json()
364
  target_ids = data.get("ids",[])
365
- if not target_ids and data.get("id"): target_ids = [data.get("id")]
366
  if not target_ids: raise HTTPException(status_code=400)
367
 
368
  target_ids =[str(t) for t in target_ids]
@@ -619,6 +616,29 @@ async def save_manual_label(request: Request):
619
  writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS, extrasaction='ignore')
620
  writer.writeheader()
621
  writer.writerows(rows)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
622
 
623
  update_queue_status(link, "Processed")
624
  return {"status": "success", "id": tweet_id}
@@ -626,29 +646,6 @@ async def save_manual_label(request: Request):
626
  logger.error(f"Save Manual Error: {e}")
627
  return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
628
 
629
- @app.get("/community/list_datasets")
630
- async def list_community_datasets():
631
- path = Path("data/comments")
632
- files =[]
633
- if path.exists():
634
- for f in path.glob("*.csv"):
635
- files.append({"id": f.stem, "count": sum(1 for _ in open(f, encoding='utf-8'))-1})
636
- return files
637
-
638
- @app.post("/community/analyze")
639
- async def analyze_community(dataset_id: str = Body(..., embed=True)):
640
- path = Path(f"data/comments/{dataset_id}.csv")
641
- if not path.exists(): raise HTTPException(status_code=404)
642
- comments = list(common_utils.robust_read_csv(path))
643
- if not comments: return {"score": 0, "verdict": "No Data"}
644
- s_keys =["fake", "lie", "staged", "bs", "propaganda", "ai", "deepfake"]
645
- t_keys =["true", "real", "confirmed", "fact", "source", "proof"]
646
- s_count = sum(1 for c in comments if any(k in c['text'].lower() for k in s_keys))
647
- t_count = sum(1 for c in comments if any(k in c['text'].lower() for k in t_keys))
648
- score = max(0, min(100, 50 + (t_count * 2) - (s_count * 5)))
649
- verdict = "Community Skepticism" if score < 30 else "Community Verification" if score > 70 else "Neutral/Mixed"
650
- return {"dataset_id": dataset_id, "trust_score": score, "verdict": verdict, "details": {"skeptical_comments": s_count, "trusting_comments": t_count}}
651
-
652
  @app.get("/dataset/list")
653
  async def get_dataset_list():
654
  dataset =[]
@@ -673,26 +670,77 @@ async def get_dataset_list():
673
  async def get_account_integrity():
674
  id_map = {}
675
  prof_dir = Path("data/profiles")
 
 
 
676
  if prof_dir.exists():
677
  for d in prof_dir.iterdir():
678
- for row in common_utils.robust_read_csv(d/"history.csv"):
679
- tid = common_utils.extract_tweet_id(row.get('link',''))
680
- if tid: id_map[tid] = d.name
 
 
 
 
 
 
681
 
682
  scores_map = {}
683
  for fname in ["data/dataset.csv", "data/manual_dataset.csv"]:
684
- for row in common_utils.robust_read_csv(Path(fname)):
 
 
685
  tid = row.get('id')
 
686
  sc = row.get('final_veracity_score', '0')
 
 
687
  try: val = float(re.sub(r'[^\d.]', '', str(sc)))
688
- except: val = 0
689
 
690
- auth = id_map.get(tid, "Unknown")
691
- if auth != "Unknown":
692
- if auth not in scores_map: scores_map[auth] =[]
693
- scores_map[auth].append(val)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
694
 
695
- return sorted([{"username": k, "avg_veracity": round(sum(v)/len(v),1), "posts_labeled": len(v)} for k,v in scores_map.items()], key=lambda x: x['avg_veracity'], reverse=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
696
 
697
  @app.post("/queue/add")
698
  async def add_queue_item(link: str = Body(..., embed=True)):
@@ -838,19 +886,6 @@ async def analyze_user_context(request: Request):
838
  return {"status": "success", "report": rep}
839
  except Exception as e: return JSONResponse({"error": str(e)}, status_code=500)
840
 
841
- @app.get("/download-dataset")
842
- async def download_dataset():
843
- file_path = Path("data/dataset.csv")
844
- if file_path.exists():
845
- return FileResponse(path=file_path, filename="dataset.csv", media_type='text/csv')
846
- return Response("Dataset not found.", status_code=404)
847
-
848
- @app.get("/model-architecture", response_class=PlainTextResponse)
849
- async def get_model_architecture():
850
- if LITE_MODE: return "Running in LITE mode."
851
- if inference_logic.base_model: return str(inference_logic.base_model)
852
- return "Model not loaded."
853
-
854
  @app.get("/", response_class=HTMLResponse)
855
  async def read_root(request: Request):
856
  return templates.TemplateResponse("index.html", {"request": request})
@@ -896,14 +931,15 @@ async def run_queue_processing(
896
  vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
897
  nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
898
  include_comments: bool = Form(False), reasoning_method: str = Form("cot"), prompt_template: str = Form("standard"),
899
- custom_query: str = Form(""), max_reprompts: int = Form(1)
 
900
  ):
901
  global STOP_QUEUE_SIGNAL
902
  STOP_QUEUE_SIGNAL = False
903
 
904
- gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name, "max_retries": max_reprompts}
905
- vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key, "max_retries": max_reprompts, "use_search": True}
906
- nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url, "max_retries": max_reprompts}
907
 
908
  sel_p = PROMPT_VARIANTS.get(prompt_template, PROMPT_VARIANTS['standard'])
909
  system_persona_txt = sel_p['instruction']
@@ -922,7 +958,9 @@ async def run_queue_processing(
922
  config_params_dict = {
923
  "reprompts": max_reprompts,
924
  "include_comments": include_comments,
925
- "agent_active": False
 
 
926
  }
927
  config_params_str = json.dumps(config_params_dict)
928
 
@@ -953,7 +991,7 @@ async def run_queue_processing(
953
  gt_data = row
954
  break
955
 
956
- yield f"data: [START] {link} (Type: {task_type})\n\n"
957
  tid = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
958
  assets = await common_utils.prepare_video_assets(link, tid)
959
 
@@ -1029,4 +1067,99 @@ async def run_queue_processing(
1029
  yield f"data: Emotional Manipul. : AI {s_float(vec_ai.get('emotional_manipulation_score'))} | GT {s_float(gt_data.get('emotional_manipulation_score'))}\n"
1030
  yield f"data: Video-Audio Align : AI {s_float(mod_ai.get('video_audio_score'))} | GT {s_float(gt_data.get('video_audio_score'))}\n"
1031
  yield f"data: Video-Caption Align: AI {s_float(mod_ai.get('video_caption_score'))} | GT {s_float(gt_data.get('video_caption_score'))}\n"
1032
- yield f"data: Audio-Caption Align: AI {s_float(mod_ai.get('audio_caption_score'))} | GT {s_float(gt_
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import sys
3
 
4
+ # Ensure 'src' is in sys.path so sibling imports work
5
  current_dir = os.path.dirname(os.path.abspath(__file__))
6
  if current_dir not in sys.path:
7
  sys.path.append(current_dir)
 
30
  import common_utils
31
 
32
  from toon_parser import parse_veracity_toon
33
+ from labeling_logic import PROMPT_VARIANTS, LABELING_PROMPT_TEMPLATE, LABELING_PROMPT_TEMPLATE_NO_COT, FCOT_MACRO_PROMPT
34
  import benchmarking
35
 
36
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
37
  logger = logging.getLogger(__name__)
38
+
39
  LITE_MODE = os.getenv("LITE_MODE", "true").lower() == "true"
40
 
41
  app = FastAPI()
 
79
  agent_mount_status = f"error_{str(e)}"
80
 
81
  # --- Static Files & Frontend ---
82
+ STATIC_DIR = "static"
83
+ if os.path.isdir("/app/static"):
84
+ STATIC_DIR = "/app/static"
85
+ elif os.path.isdir("/usr/share/vchat/static"):
86
+ STATIC_DIR = "/usr/share/vchat/static"
87
+ elif os.path.isdir("frontend/dist"):
88
+ STATIC_DIR = "frontend/dist"
89
+ elif not os.path.isdir(STATIC_DIR):
90
+ os.makedirs(STATIC_DIR, exist_ok=True)
91
 
92
  app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
93
 
94
+ # Explicitly mount assets for Vite support
95
  assets_path = os.path.join(STATIC_DIR, "assets")
96
  if os.path.exists(assets_path):
97
  app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
 
228
  async def get_benchmark_leaderboard():
229
  return benchmarking.generate_leaderboard()
230
 
 
 
 
 
231
  @app.get("/config/prompts")
232
  async def list_prompts():
233
  return [{"id": k, "name": v['description']} for k, v in PROMPT_VARIANTS.items()]
 
257
  t = t.strip()
258
  if t: tags_count[t] = tags_count.get(t, 0) + 1
259
  sorted_tags = sorted(tags_count.items(), key=lambda x: x[1], reverse=True)
260
+ return[{"name": k, "count": v} for k, v in sorted_tags]
261
 
262
  @app.post("/extension/ingest")
263
  async def extension_ingest_link(request: Request):
 
301
  try:
302
  data = await request.json()
303
  target_ids = data.get("ids",[])
304
+ if not target_ids and data.get("id"): target_ids =[data.get("id")]
305
 
306
  if not target_ids: return JSONResponse({"status": "error", "message": "No IDs provided"}, status_code=400)
307
 
 
359
  try:
360
  data = await request.json()
361
  target_ids = data.get("ids",[])
362
+ if not target_ids and data.get("id"): target_ids =[data.get("id")]
363
  if not target_ids: raise HTTPException(status_code=400)
364
 
365
  target_ids =[str(t) for t in target_ids]
 
616
  writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS, extrasaction='ignore')
617
  writer.writeheader()
618
  writer.writerows(rows)
619
+
620
+ # Add to User Profiles Catalog
621
+ author = common_utils.extract_twitter_username(link)
622
+ if author:
623
+ prof_dir = Path(f"data/profiles/{author}")
624
+ prof_dir.mkdir(parents=True, exist_ok=True)
625
+ hist_path = prof_dir / "history.csv"
626
+ hist_exists = hist_path.exists()
627
+ existing_links = set()
628
+ if hist_exists:
629
+ for r in common_utils.robust_read_csv(hist_path):
630
+ existing_links.add(r.get('link'))
631
+ if link not in existing_links:
632
+ with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
633
+ fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
634
+ hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
635
+ if not hist_exists: hwriter.writeheader()
636
+ hwriter.writerow({
637
+ "link": link,
638
+ "timestamp": row["timestamp"],
639
+ "text": row["caption"],
640
+ "ingested_at": row["timestamp"]
641
+ })
642
 
643
  update_queue_status(link, "Processed")
644
  return {"status": "success", "id": tweet_id}
 
646
  logger.error(f"Save Manual Error: {e}")
647
  return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
648
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649
  @app.get("/dataset/list")
650
  async def get_dataset_list():
651
  dataset =[]
 
670
  async def get_account_integrity():
671
  id_map = {}
672
  prof_dir = Path("data/profiles")
673
+ prof_dir.mkdir(parents=True, exist_ok=True)
674
+
675
+ existing_links_per_user = {}
676
  if prof_dir.exists():
677
  for d in prof_dir.iterdir():
678
+ if d.is_dir():
679
+ hist_file = d / "history.csv"
680
+ existing_links_per_user[d.name] = set()
681
+ if hist_file.exists():
682
+ for row in common_utils.robust_read_csv(hist_file):
683
+ link = row.get('link', '')
684
+ tid = common_utils.extract_tweet_id(link)
685
+ if tid: id_map[tid] = d.name
686
+ existing_links_per_user[d.name].add(link)
687
 
688
  scores_map = {}
689
  for fname in ["data/dataset.csv", "data/manual_dataset.csv"]:
690
+ path = Path(fname)
691
+ if not path.exists(): continue
692
+ for row in common_utils.robust_read_csv(path):
693
  tid = row.get('id')
694
+ link = row.get('link', '')
695
  sc = row.get('final_veracity_score', '0')
696
+ ts = row.get('timestamp', '')
697
+ caption = row.get('caption', '')
698
  try: val = float(re.sub(r'[^\d.]', '', str(sc)))
699
+ except: val = -1
700
 
701
+ # Require scores to be between 0 and 100
702
+ if 0 <= val <= 100:
703
+ auth = common_utils.extract_twitter_username(link) or id_map.get(tid, "Unknown")
704
+ if auth and auth != "Unknown":
705
+ if auth not in scores_map: scores_map[auth] = []
706
+ scores_map[auth].append({'val': val, 'ts': ts})
707
+
708
+ # Auto-add missing accounts/links to the Profile catalog
709
+ if auth not in existing_links_per_user:
710
+ existing_links_per_user[auth] = set()
711
+ Path(f"data/profiles/{auth}").mkdir(parents=True, exist_ok=True)
712
+
713
+ if link not in existing_links_per_user[auth]:
714
+ existing_links_per_user[auth].add(link)
715
+ hist_path = Path(f"data/profiles/{auth}/history.csv")
716
+ hist_exists = hist_path.exists()
717
+ with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
718
+ fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
719
+ hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
720
+ if not hist_exists: hwriter.writeheader()
721
+ hwriter.writerow({
722
+ "link": link,
723
+ "timestamp": ts,
724
+ "text": caption,
725
+ "ingested_at": ts
726
+ })
727
 
728
+ results =[]
729
+ for k, v in scores_map.items():
730
+ v_sorted = sorted(v, key=lambda x: x['ts'], reverse=True)
731
+ decay_factor = 0.9
732
+ total_weight = 0
733
+ weighted_sum = 0
734
+
735
+ for i, item in enumerate(v_sorted):
736
+ weight = decay_factor ** i
737
+ weighted_sum += item['val'] * weight
738
+ total_weight += weight
739
+
740
+ avg_veracity = round(weighted_sum / total_weight, 1) if total_weight > 0 else 0
741
+ results.append({"username": k, "avg_veracity": avg_veracity, "posts_labeled": len(v)})
742
+
743
+ return sorted(results, key=lambda x: x['avg_veracity'], reverse=True)
744
 
745
  @app.post("/queue/add")
746
  async def add_queue_item(link: str = Body(..., embed=True)):
 
886
  return {"status": "success", "report": rep}
887
  except Exception as e: return JSONResponse({"error": str(e)}, status_code=500)
888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
889
  @app.get("/", response_class=HTMLResponse)
890
  async def read_root(request: Request):
891
  return templates.TemplateResponse("index.html", {"request": request})
 
931
  vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
932
  nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
933
  include_comments: bool = Form(False), reasoning_method: str = Form("cot"), prompt_template: str = Form("standard"),
934
+ custom_query: str = Form(""), max_reprompts: int = Form(1),
935
+ use_search: bool = Form(False), use_code: bool = Form(False)
936
  ):
937
  global STOP_QUEUE_SIGNAL
938
  STOP_QUEUE_SIGNAL = False
939
 
940
+ gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
941
+ vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
942
+ nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
943
 
944
  sel_p = PROMPT_VARIANTS.get(prompt_template, PROMPT_VARIANTS['standard'])
945
  system_persona_txt = sel_p['instruction']
 
958
  config_params_dict = {
959
  "reprompts": max_reprompts,
960
  "include_comments": include_comments,
961
+ "agent_active": False,
962
+ "use_search": use_search,
963
+ "use_code": use_code
964
  }
965
  config_params_str = json.dumps(config_params_dict)
966
 
 
991
  gt_data = row
992
  break
993
 
994
+ yield f"data:[START] {link} (Type: {task_type})\n\n"
995
  tid = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
996
  assets = await common_utils.prepare_video_assets(link, tid)
997
 
 
1067
  yield f"data: Emotional Manipul. : AI {s_float(vec_ai.get('emotional_manipulation_score'))} | GT {s_float(gt_data.get('emotional_manipulation_score'))}\n"
1068
  yield f"data: Video-Audio Align : AI {s_float(mod_ai.get('video_audio_score'))} | GT {s_float(gt_data.get('video_audio_score'))}\n"
1069
  yield f"data: Video-Caption Align: AI {s_float(mod_ai.get('video_caption_score'))} | GT {s_float(gt_data.get('video_caption_score'))}\n"
1070
+ yield f"data: Audio-Caption Align: AI {s_float(mod_ai.get('audio_caption_score'))} | GT {s_float(gt_data.get('audio_caption_score'))}\n"
1071
+ yield f"data: FINAL VERACITY : AI {ai_score} | GT {gt_final} | Delta: {delta}\n\n"
1072
+
1073
+ comp_path = Path("data/comparison.csv")
1074
+ comp_exists = comp_path.exists()
1075
+ with open(comp_path, 'a', newline='', encoding='utf-8') as cf:
1076
+ cw = csv.DictWriter(cf, fieldnames=["id", "link", "timestamp", "gt_score", "ai_score", "delta", "model", "prompt", "reasoning_method"])
1077
+ if not comp_exists: cw.writeheader()
1078
+ cw.writerow({
1079
+ "id": tid, "link": link, "timestamp": datetime.datetime.now().isoformat(),
1080
+ "gt_score": gt_final, "ai_score": ai_score, "delta": delta,
1081
+ "model": active_model_name, "prompt": prompt_template, "reasoning_method": reasoning_method
1082
+ })
1083
+
1084
+ try:
1085
+ with open(d_path, 'a', newline='', encoding='utf-8') as f:
1086
+ row = {
1087
+ "id": tid, "link": link, "timestamp": datetime.datetime.now().isoformat(),
1088
+ "caption": assets['caption'],
1089
+ "final_veracity_score": ai_score,
1090
+ "visual_score": parsed['veracity_vectors'].get('visual_integrity_score', 0),
1091
+ "audio_score": parsed['veracity_vectors'].get('audio_integrity_score', 0),
1092
+ "source_score": parsed['veracity_vectors'].get('source_credibility_score', 0),
1093
+ "logic_score": parsed['veracity_vectors'].get('logical_consistency_score', 0),
1094
+ "emotion_score": parsed['veracity_vectors'].get('emotional_manipulation_score', 0),
1095
+ "align_video_audio": parsed['modalities'].get('video_audio_score', 0),
1096
+ "align_video_caption": parsed['modalities'].get('video_caption_score', 0),
1097
+ "align_audio_caption": parsed['modalities'].get('audio_caption_score', 0),
1098
+ "classification": parsed['disinformation_analysis'].get('classification', 'None'),
1099
+ "reasoning": parsed['final_assessment'].get('reasoning', ''),
1100
+ "tags": ",".join(parsed.get('tags',[])),
1101
+ "raw_toon": res_data.get("raw_toon", ""),
1102
+ "config_type": "GenAI",
1103
+ "config_model": active_model_name,
1104
+ "config_prompt": prompt_template,
1105
+ "config_reasoning": reasoning_method,
1106
+ "config_params": config_params_str
1107
+ }
1108
+ writer = csv.DictWriter(f, fieldnames=DATASET_COLUMNS, extrasaction='ignore')
1109
+ if not exists: writer.writeheader()
1110
+ writer.writerow(row)
1111
+ except Exception as csv_err: logger.error(f"CSV Write Failed: {csv_err}")
1112
+
1113
+ try:
1114
+ ts = datetime.datetime.now().isoformat()
1115
+ ts_clean = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
1116
+ flat_parsed = parsed.copy()
1117
+ flat_parsed["raw_toon"] = res_data.get("raw_toon", "")
1118
+ flat_parsed["meta_info"] = {
1119
+ "id": tid, "timestamp": ts, "link": link,
1120
+ "prompt_used": res_data.get("prompt_used", ""),
1121
+ "model_selection": model_selection,
1122
+ "config_type": "GenAI",
1123
+ "config_model": active_model_name,
1124
+ "config_prompt": prompt_template,
1125
+ "config_reasoning": reasoning_method,
1126
+ "config_params": config_params_dict
1127
+ }
1128
+ with open(Path(f"data/labels/{tid}_{ts_clean}.json"), 'w', encoding='utf-8') as f: json.dump(flat_parsed, f, indent=2, ensure_ascii=False)
1129
+ except Exception as e: logger.error(f"Sidecar Error: {e}")
1130
+
1131
+ # Add to User Profiles Catalog
1132
+ author = common_utils.extract_twitter_username(link)
1133
+ if author:
1134
+ prof_dir = Path(f"data/profiles/{author}")
1135
+ prof_dir.mkdir(parents=True, exist_ok=True)
1136
+ hist_path = prof_dir / "history.csv"
1137
+ hist_exists = hist_path.exists()
1138
+ existing_links = set()
1139
+ if hist_exists:
1140
+ for r in common_utils.robust_read_csv(hist_path):
1141
+ existing_links.add(r.get('link'))
1142
+ if link not in existing_links:
1143
+ with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
1144
+ fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
1145
+ hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
1146
+ if not hist_exists: hwriter.writeheader()
1147
+ hwriter.writerow({
1148
+ "link": link,
1149
+ "timestamp": datetime.datetime.now().isoformat(),
1150
+ "text": assets['caption'],
1151
+ "ingested_at": datetime.datetime.now().isoformat()
1152
+ })
1153
+
1154
+ p_ids.add(tid)
1155
+ p_links.add(common_utils.normalize_link(link))
1156
+ update_queue_status(link, "Processed", task_type)
1157
+ yield f"data:[SUCCESS] Saved.\n\n"
1158
+ else:
1159
+ err_msg = res_data.get('error') if isinstance(res_data, dict) else "Inference failed"
1160
+ log_queue_error(link, err_msg, task_type)
1161
+ yield f"data: [FAIL] {err_msg}.\n\n"
1162
+ await asyncio.sleep(0.5)
1163
+ yield "event: close\ndata: Done\n\n"
1164
+
1165
+ return StreamingResponse(queue_stream(), media_type="text/event-stream")
src/benchmarking.py CHANGED
@@ -1,224 +1,229 @@
1
- import pandas as pd
2
- import numpy as np
3
- import shutil
4
- import json
5
- import math
6
- from pathlib import Path
7
- from sklearn.linear_model import LogisticRegression
8
- from sklearn.model_selection import train_test_split
9
-
10
- # Lazy import to avoid startup overhead
11
- try:
12
- from autogluon.tabular import TabularPredictor
13
- AUTOGLUON_AVAILABLE = True
14
- except ImportError:
15
- AUTOGLUON_AVAILABLE = False
16
-
17
- DATA_AI = Path("data/dataset.csv")
18
- DATA_MANUAL = Path("data/manual_dataset.csv")
19
-
20
- def sanitize_for_json(obj):
21
- """Recursively clean floats for JSON output."""
22
- if isinstance(obj, float):
23
- if math.isnan(obj) or math.isinf(obj): return None
24
- return obj
25
- elif isinstance(obj, dict):
26
- return {k: sanitize_for_json(v) for k, v in obj.items()}
27
- elif isinstance(obj, list):
28
- return[sanitize_for_json(v) for v in obj]
29
- return obj
30
-
31
- def calculate_tag_accuracy(tags_ai, tags_man):
32
- if pd.isna(tags_ai): tags_ai = ""
33
- if pd.isna(tags_man): tags_man = ""
34
- set_ai = set([t.strip().lower() for t in str(tags_ai).split(',') if t.strip()])
35
- set_man = set([t.strip().lower() for t in str(tags_man).split(',') if t.strip()])
36
- if not set_man and not set_ai: return 1.0
37
- if not set_man or not set_ai: return 0.0
38
- # Jaccard Similarity
39
- return len(set_ai.intersection(set_man)) / len(set_ai.union(set_man))
40
-
41
- def get_combined_dataset():
42
- """
43
- Joins AI predictions with Manual Ground Truth on ID and calculates comprehensive vector differences.
44
- """
45
- if not DATA_AI.exists() or not DATA_MANUAL.exists():
46
- return None
47
-
48
- try:
49
- # Load datasets
50
- df_ai = pd.read_csv(DATA_AI)
51
- df_manual = pd.read_csv(DATA_MANUAL)
52
-
53
- # Normalize IDs (Trim spaces, ensure string)
54
- df_ai['id'] = df_ai['id'].astype(str).str.strip()
55
- df_manual['id'] = df_manual['id'].astype(str).str.strip()
56
-
57
- df_manual_cols =['id', 'final_veracity_score', 'visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score', 'video_audio_score', 'video_caption_score', 'audio_caption_score', 'tags', 'classification']
58
-
59
- # Merge on ID
60
- merged = pd.merge(
61
- df_ai,
62
- df_manual[[c for c in df_manual_cols if c in df_manual.columns]],
63
- on='id',
64
- suffixes=('_ai', '_manual'),
65
- how='inner'
66
- )
67
-
68
- # 1. Final Score Error
69
- merged['final_veracity_score_ai'] = pd.to_numeric(merged['final_veracity_score_ai'], errors='coerce').fillna(0)
70
- merged['final_veracity_score_manual'] = pd.to_numeric(merged['final_veracity_score_manual'], errors='coerce').fillna(0)
71
- merged['abs_error'] = (merged['final_veracity_score_ai'] - merged['final_veracity_score_manual']).abs()
72
-
73
- # 2. Sophisticated Vector Calculations
74
- vector_pairs =[
75
- ('visual_score', 'visual_integrity_score'),
76
- ('audio_score', 'audio_integrity_score'),
77
- ('source_score', 'source_credibility_score'),
78
- ('logic_score', 'logical_consistency_score'),
79
- ('emotion_score', 'emotional_manipulation_score'),
80
- ('align_video_audio', 'video_audio_score'),
81
- ('align_video_caption', 'video_caption_score'),
82
- ('align_audio_caption', 'audio_caption_score'),
83
- ]
84
-
85
- error_cols =['abs_error']
86
- for ai_c, man_c in vector_pairs:
87
- if ai_c in merged.columns and man_c in merged.columns:
88
- # Multiply 1-10 scores by 10 to put them on the same 0-100 scale as final score
89
- merged[ai_c] = pd.to_numeric(merged[ai_c], errors='coerce').fillna(5) * 10
90
- merged[man_c] = pd.to_numeric(merged[man_c], errors='coerce').fillna(5) * 10
91
- err_c = f"err_{ai_c}"
92
- merged[err_c] = (merged[ai_c] - merged[man_c]).abs()
93
- error_cols.append(err_c)
94
-
95
- # Composite MAE represents the mean absolute error across the final score AND all 8 sub-vectors
96
- merged['composite_mae'] = merged[error_cols].mean(axis=1)
97
-
98
- # 3. Tag Accuracy Calculation
99
- merged['tag_accuracy'] = merged.apply(lambda row: calculate_tag_accuracy(row.get('tags_ai', ''), row.get('tags_manual', '')), axis=1)
100
-
101
- return merged
102
- except Exception as e:
103
- print(f"Error merging datasets: {e}")
104
- return None
105
-
106
- def format_config_params(params_raw):
107
- """Parses the config_params JSON string into a readable format for the leaderboard."""
108
- if pd.isna(params_raw) or not params_raw:
109
- return "Defaults"
110
- try:
111
- if isinstance(params_raw, str):
112
- p = json.loads(params_raw)
113
- else:
114
- p = params_raw
115
-
116
- reprompts = p.get('reprompts', 0)
117
- comments = "Yes" if p.get('include_comments') == 'true' or p.get('include_comments') is True else "No"
118
- return f"Retries:{reprompts} | Context:{comments}"
119
- except:
120
- return "Legacy/Unknown"
121
-
122
- def calculate_benchmarks():
123
- """Global stats (All AI models vs Ground Truth)."""
124
- merged = get_combined_dataset()
125
- if merged is None or len(merged) == 0:
126
- return {"status": "no_data"}
127
-
128
- mae = merged['composite_mae'].mean()
129
- tag_acc = merged['tag_accuracy'].mean()
130
-
131
- # Binary Accuracy (Threshold 50)
132
- merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
133
- merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
134
- accuracy = (merged['bin_ai'] == merged['bin_manual']).mean()
135
-
136
- recent_samples = merged.tail(5)[['id', 'composite_mae', 'final_veracity_score_ai', 'final_veracity_score_manual']].to_dict(orient='records')
137
-
138
- result = {
139
- "count": int(len(merged)),
140
- "mae": round(mae, 2), # Exposing composite MAE as main MAE metric
141
- "accuracy_percent": round(accuracy * 100, 1),
142
- "tag_accuracy_percent": round(tag_acc * 100, 1),
143
- "recent_samples": recent_samples
144
- }
145
- return sanitize_for_json(result)
146
-
147
- def generate_leaderboard():
148
- """
149
- Groups results by Configuration to rank models/prompts using sophisticated distance measurements.
150
- """
151
- merged = get_combined_dataset()
152
- if merged is None or len(merged) == 0:
153
- return[]
154
-
155
- for col in['config_model', 'config_prompt', 'config_reasoning', 'config_params']:
156
- if col not in merged.columns: merged[col] = "Unknown"
157
-
158
- merged = merged.fillna({'config_model': 'Unknown', 'config_prompt': 'Standard', 'config_reasoning': 'None'})
159
-
160
- merged['params_readable'] = merged['config_params'].apply(format_config_params)
161
-
162
- merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
163
- merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
164
- merged['is_correct'] = (merged['bin_ai'] == merged['bin_manual']).astype(int)
165
-
166
- def get_fcot_depth(row):
167
- r = str(row['config_reasoning']).lower()
168
- if 'fcot' in r: return 2
169
- elif 'cot' in r: return 1
170
- return 0
171
- merged['fcot_depth'] = merged.apply(get_fcot_depth, axis=1)
172
-
173
- # Group By Configuration using Composite MAE and Tag Accuracy
174
- grouped = merged.groupby(['config_model', 'config_prompt', 'config_reasoning', 'params_readable', 'fcot_depth']).agg(
175
- comp_mae=('composite_mae', 'mean'),
176
- tag_accuracy=('tag_accuracy', 'mean'),
177
- accuracy=('is_correct', 'mean'),
178
- count=('id', 'count')
179
- ).reset_index()
180
-
181
- leaderboard =[]
182
- for _, row in grouped.iterrows():
183
- leaderboard.append({
184
- "type": "GenAI",
185
- "model": row['config_model'],
186
- "prompt": row['config_prompt'],
187
- "reasoning": row['config_reasoning'],
188
- "params": row['params_readable'],
189
- "fcot_depth": int(row['fcot_depth']),
190
- "comp_mae": round(row['comp_mae'], 2),
191
- "tag_acc": round(row['tag_accuracy'] * 100, 1),
192
- "accuracy": round(row['accuracy'] * 100, 1),
193
- "samples": int(row['count'])
194
- })
195
-
196
- # Sort: Highest Accuracy, Highest Tag Accuracy, then Lowest Composite MAE
197
- leaderboard.sort(key=lambda x: (-x['accuracy'], -x['tag_acc'], x['comp_mae']))
198
-
199
- return sanitize_for_json(leaderboard)
200
-
201
- def train_predictive_sandbox(features_config: dict):
202
- if not DATA_MANUAL.exists(): return {"error": "No data"}
203
- df = pd.read_csv(DATA_MANUAL).dropna(subset=['caption', 'final_veracity_score'])
204
- if len(df) < 5: return {"error": "Not enough data"}
205
-
206
- df['len'] = df['caption'].astype(str).apply(len)
207
- keywords = ["shocking", "breaking", "watch"]
208
- df['kw_count'] = df['caption'].astype(str).apply(lambda x: sum(1 for k in keywords if k in x.lower()))
209
- feat_cols = ['len', 'kw_count']
210
-
211
- df['target'] = (pd.to_numeric(df['final_veracity_score'], errors='coerce').fillna(0) >= 50).astype(int)
212
-
213
- try:
214
- X_train, X_test, y_train, y_test = train_test_split(df[feat_cols], df['target'], test_size=0.3, random_state=42)
215
- clf = LogisticRegression()
216
- clf.fit(X_train, y_train)
217
- return {
218
- "status": "success",
219
- "type": "logistic_regression",
220
- "accuracy": round(clf.score(X_test, y_test) * 100, 1),
221
- "message": "Baseline trained on Caption Length + Keywords."
222
- }
223
- except Exception as e:
224
- return {"error": str(e)}
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import shutil
4
+ import json
5
+ import math
6
+ from pathlib import Path
7
+
8
+ # Lazy import to avoid startup overhead
9
+ try:
10
+ from autogluon.tabular import TabularPredictor
11
+ AUTOGLUON_AVAILABLE = True
12
+ except ImportError:
13
+ AUTOGLUON_AVAILABLE = False
14
+
15
+ DATA_AI = Path("data/dataset.csv")
16
+ DATA_MANUAL = Path("data/manual_dataset.csv")
17
+
18
+ def sanitize_for_json(obj):
19
+ """Recursively clean floats for JSON output."""
20
+ if isinstance(obj, float):
21
+ if math.isnan(obj) or math.isinf(obj): return None
22
+ return obj
23
+ elif isinstance(obj, dict):
24
+ return {k: sanitize_for_json(v) for k, v in obj.items()}
25
+ elif isinstance(obj, list):
26
+ return[sanitize_for_json(v) for v in obj]
27
+ return obj
28
+
29
+ def calculate_tag_accuracy(tags_ai, tags_man):
30
+ if pd.isna(tags_ai): tags_ai = ""
31
+ if pd.isna(tags_man): tags_man = ""
32
+ set_ai = set([t.strip().lower() for t in str(tags_ai).split(',') if t.strip()])
33
+ set_man = set([t.strip().lower() for t in str(tags_man).split(',') if t.strip()])
34
+ if not set_man and not set_ai: return 1.0
35
+ if not set_man or not set_ai: return 0.0
36
+ # Jaccard Similarity
37
+ return len(set_ai.intersection(set_man)) / len(set_ai.union(set_man))
38
+
39
+ def get_combined_dataset():
40
+ """
41
+ Joins AI predictions with Manual Ground Truth on ID and calculates comprehensive vector differences.
42
+ """
43
+ if not DATA_AI.exists() or not DATA_MANUAL.exists():
44
+ return None
45
+
46
+ try:
47
+ # Load datasets
48
+ df_ai = pd.read_csv(DATA_AI)
49
+ df_manual = pd.read_csv(DATA_MANUAL)
50
+
51
+ # Normalize IDs (Trim spaces, ensure string)
52
+ df_ai['id'] = df_ai['id'].astype(str).str.strip()
53
+ df_manual['id'] = df_manual['id'].astype(str).str.strip()
54
+
55
+ df_manual_cols =['id', 'final_veracity_score', 'visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score', 'video_audio_score', 'video_caption_score', 'audio_caption_score', 'tags', 'classification']
56
+
57
+ # Merge on ID
58
+ merged = pd.merge(
59
+ df_ai,
60
+ df_manual[[c for c in df_manual_cols if c in df_manual.columns]],
61
+ on='id',
62
+ suffixes=('_ai', '_manual'),
63
+ how='inner'
64
+ )
65
+
66
+ # 1. Final Score Error
67
+ merged['final_veracity_score_ai'] = pd.to_numeric(merged['final_veracity_score_ai'], errors='coerce').fillna(0)
68
+ merged['final_veracity_score_manual'] = pd.to_numeric(merged['final_veracity_score_manual'], errors='coerce').fillna(0)
69
+ merged['abs_error'] = (merged['final_veracity_score_ai'] - merged['final_veracity_score_manual']).abs()
70
+
71
+ # 2. Sophisticated Vector Calculations
72
+ vector_pairs =[
73
+ ('visual_score', 'visual_integrity_score'),
74
+ ('audio_score', 'audio_integrity_score'),
75
+ ('source_score', 'source_credibility_score'),
76
+ ('logic_score', 'logical_consistency_score'),
77
+ ('emotion_score', 'emotional_manipulation_score'),
78
+ ('align_video_audio', 'video_audio_score'),
79
+ ('align_video_caption', 'video_caption_score'),
80
+ ('align_audio_caption', 'audio_caption_score'),
81
+ ]
82
+
83
+ error_cols = ['abs_error']
84
+ for ai_c, man_c in vector_pairs:
85
+ if ai_c in merged.columns and man_c in merged.columns:
86
+ # Multiply 1-10 scores by 10 to put them on the same 0-100 scale as final score
87
+ merged[ai_c] = pd.to_numeric(merged[ai_c], errors='coerce').fillna(5) * 10
88
+ merged[man_c] = pd.to_numeric(merged[man_c], errors='coerce').fillna(5) * 10
89
+ err_c = f"err_{ai_c}"
90
+ merged[err_c] = (merged[ai_c] - merged[man_c]).abs()
91
+ error_cols.append(err_c)
92
+
93
+ # Composite MAE represents the mean absolute error across the final score AND all 8 sub-vectors
94
+ merged['composite_mae'] = merged[error_cols].mean(axis=1)
95
+
96
+ # 3. Tag Accuracy Calculation
97
+ merged['tag_accuracy'] = merged.apply(lambda row: calculate_tag_accuracy(row.get('tags_ai', ''), row.get('tags_manual', '')), axis=1)
98
+
99
+ return merged
100
+ except Exception as e:
101
+ print(f"Error merging datasets: {e}")
102
+ return None
103
+
104
+ def format_config_params(params_raw):
105
+ """Parses the config_params JSON string into a readable format for the leaderboard."""
106
+ if pd.isna(params_raw) or not params_raw:
107
+ return "Defaults"
108
+ try:
109
+ if isinstance(params_raw, str):
110
+ p = json.loads(params_raw)
111
+ else:
112
+ p = params_raw
113
+
114
+ reprompts = p.get('reprompts', 0)
115
+ comments = "Yes" if p.get('include_comments') == 'true' or p.get('include_comments') is True else "No"
116
+ return f"Retries:{reprompts} | Context:{comments}"
117
+ except:
118
+ return "Legacy/Unknown"
119
+
120
+ def calculate_benchmarks():
121
+ """Global stats (All AI models vs Ground Truth)."""
122
+ merged = get_combined_dataset()
123
+ if merged is None or len(merged) == 0:
124
+ return {"status": "no_data"}
125
+
126
+ mae = merged['composite_mae'].mean()
127
+ tag_acc = merged['tag_accuracy'].mean()
128
+
129
+ # Binary Accuracy (Threshold 50)
130
+ merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
131
+ merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
132
+ accuracy = (merged['bin_ai'] == merged['bin_manual']).mean()
133
+
134
+ recent_samples = merged.tail(5)[['id', 'composite_mae', 'final_veracity_score_ai', 'final_veracity_score_manual']].to_dict(orient='records')
135
+
136
+ result = {
137
+ "count": int(len(merged)),
138
+ "mae": round(mae, 2), # Exposing composite MAE as main MAE metric
139
+ "accuracy_percent": round(accuracy * 100, 1),
140
+ "tag_accuracy_percent": round(tag_acc * 100, 1),
141
+ "recent_samples": recent_samples
142
+ }
143
+ return sanitize_for_json(result)
144
+
145
+ def generate_leaderboard():
146
+ """
147
+ Groups results by Configuration to rank models/prompts using sophisticated distance measurements.
148
+ """
149
+ merged = get_combined_dataset()
150
+ if merged is None or len(merged) == 0:
151
+ return []
152
+
153
+ for col in['config_model', 'config_prompt', 'config_reasoning', 'config_params']:
154
+ if col not in merged.columns: merged[col] = "Unknown"
155
+
156
+ merged = merged.fillna({'config_model': 'Unknown', 'config_prompt': 'Standard', 'config_reasoning': 'None'})
157
+
158
+ merged['params_readable'] = merged['config_params'].apply(format_config_params)
159
+
160
+ def extract_tools(p_raw):
161
+ try:
162
+ if isinstance(p_raw, str): p = json.loads(p_raw)
163
+ else: p = p_raw
164
+ if not isinstance(p, dict): return "None"
165
+ tools =[]
166
+ if p.get('agent_active'): tools.append("Agent")
167
+ if p.get('use_search'): tools.append("Search")
168
+ if p.get('use_code'): tools.append("Code")
169
+ if p.get('few_shot') or p.get('multi_shot'): tools.append("Few-Shot")
170
+ return ", ".join(tools) if tools else "None"
171
+ except:
172
+ return "None"
173
+
174
+ merged['tools'] = merged['config_params'].apply(extract_tools)
175
+
176
+ merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
177
+ merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
178
+ merged['is_correct'] = (merged['bin_ai'] == merged['bin_manual']).astype(int)
179
+
180
+ def get_fcot_depth(row):
181
+ r = str(row['config_reasoning']).lower()
182
+ if 'fcot' in r: return 2
183
+ elif 'cot' in r: return 1
184
+ return 0
185
+ merged['fcot_depth'] = merged.apply(get_fcot_depth, axis=1)
186
+
187
+ agg_dict = {
188
+ 'comp_mae': ('composite_mae', 'mean'),
189
+ 'tag_accuracy': ('tag_accuracy', 'mean'),
190
+ 'accuracy': ('is_correct', 'mean'),
191
+ 'count': ('id', 'count')
192
+ }
193
+
194
+ err_cols =[
195
+ 'err_visual_score', 'err_audio_score', 'err_source_score',
196
+ 'err_logic_score', 'err_emotion_score', 'err_align_video_audio',
197
+ 'err_align_video_caption', 'err_align_audio_caption'
198
+ ]
199
+ for col in err_cols:
200
+ if col in merged.columns:
201
+ agg_dict[col] = (col, 'mean')
202
+
203
+ # Group By Configuration using Composite MAE and Tag Accuracy
204
+ grouped = merged.groupby(['config_model', 'config_prompt', 'config_reasoning', 'params_readable', 'tools', 'fcot_depth']).agg(**agg_dict).reset_index()
205
+
206
+ leaderboard =[]
207
+ for _, row in grouped.iterrows():
208
+ entry = {
209
+ "type": "GenAI",
210
+ "model": row['config_model'],
211
+ "prompt": row['config_prompt'],
212
+ "reasoning": row['config_reasoning'],
213
+ "params": row['params_readable'],
214
+ "tools": row['tools'],
215
+ "fcot_depth": int(row['fcot_depth']),
216
+ "comp_mae": round(row['comp_mae'], 2),
217
+ "tag_acc": round(row['tag_accuracy'] * 100, 1),
218
+ "accuracy": round(row['accuracy'] * 100, 1),
219
+ "samples": int(row['count'])
220
+ }
221
+ for col in err_cols:
222
+ if col in row:
223
+ entry[col] = round(row[col], 2)
224
+ leaderboard.append(entry)
225
+
226
+ # Sort: Highest Accuracy, Highest Tag Accuracy, then Lowest Composite MAE
227
+ leaderboard.sort(key=lambda x: (-x['accuracy'], -x['tag_acc'], x['comp_mae']))
228
+
229
+ return sanitize_for_json(leaderboard)
src/common_utils.py CHANGED
@@ -1,104 +1,110 @@
1
- import os
2
- import re
3
- import csv
4
- import logging
5
- import datetime
6
- import subprocess
7
- import hashlib
8
- from pathlib import Path
9
- import yt_dlp
10
- import transcription
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
- def robust_read_csv(file_path: Path):
15
- if not file_path.exists():
16
- return
17
-
18
- try:
19
- with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
20
- clean_lines = (line.replace('\0', '') for line in f)
21
- reader = csv.DictReader(clean_lines)
22
- for row in reader:
23
- if row:
24
- yield row
25
- except Exception as e:
26
- logger.error(f"Error reading CSV {file_path}: {e}")
27
- return
28
-
29
- def extract_tweet_id(url: str) -> str | None:
30
- if not url: return None
31
- match = re.search(r"(?:twitter|x)\.com/[^/]+/status/(\d+)", url)
32
- if match: return match.group(1)
33
- return None
34
-
35
- def normalize_link(link: str) -> str:
36
- if not link: return ""
37
- return link.split('?')[0].strip().rstrip('/').replace('http://', '').replace('https://', '').replace('www.', '')
38
-
39
- def parse_vtt(file_path: str) -> str:
40
- """Parses a .vtt subtitle file and returns the clean text content."""
41
- try:
42
- if not os.path.exists(file_path):
43
- return "Transcript file not found."
44
-
45
- with open(file_path, 'r', encoding='utf-8') as f:
46
- lines = f.readlines()
47
-
48
- text_lines =[]
49
- for line in lines:
50
- line = line.strip()
51
- if line and not line.startswith('WEBVTT') and not '-->' in line and not line.isdigit():
52
- clean_line = re.sub(r'<[^>]+>', '', line)
53
- if clean_line and (not text_lines or clean_line != text_lines[-1]):
54
- text_lines.append(clean_line)
55
-
56
- return "\n".join(text_lines) if text_lines else "No speech found in transcript."
57
- except Exception as e:
58
- logger.error(f"Error parsing VTT file {file_path}: {e}")
59
- return f"Error reading transcript: {e}"
60
-
61
- async def prepare_video_assets(link: str, output_id: str) -> dict:
62
- video_dir = Path("data/videos")
63
- if not video_dir.exists():
64
- video_dir.mkdir(parents=True, exist_ok=True)
65
-
66
- video_path = video_dir / f"{output_id}.mp4"
67
- audio_path = video_dir / f"{output_id}.wav"
68
- transcript_path = video_dir / f"{output_id}.vtt"
69
-
70
- caption = ""
71
- video_downloaded = False
72
-
73
- ydl_opts = {
74
- 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',
75
- 'outtmpl': str(video_path),
76
- 'quiet': True, 'ignoreerrors': True, 'no_warnings': True, 'skip_download': False
77
- }
78
-
79
- try:
80
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
81
- info = ydl.extract_info(link, download=False)
82
- if info:
83
- caption = info.get('description', '') or info.get('title', '')
84
- formats = info.get('formats',[])
85
- if not formats and not info.get('url'):
86
- logger.info(f"No video formats found for {link}. Treating as text-only.")
87
- else:
88
- if not video_path.exists(): ydl.download([link])
89
- except Exception as e:
90
- logger.error(f"Download error for {link}: {e}")
91
-
92
- if video_path.exists() and video_path.stat().st_size > 0:
93
- video_downloaded = True
94
- if not audio_path.exists():
95
- subprocess.run(["ffmpeg", "-y", "-i", str(video_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(audio_path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
96
- if audio_path.exists() and not transcript_path.exists():
97
- transcription.load_model()
98
- transcription.generate_transcript(str(audio_path))
99
-
100
- return {
101
- "video": str(video_path) if video_downloaded else None,
102
- "transcript": str(transcript_path) if video_downloaded and transcript_path.exists() else None,
103
- "caption": caption
104
- }
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import csv
4
+ import logging
5
+ import datetime
6
+ import subprocess
7
+ import hashlib
8
+ from pathlib import Path
9
+ import yt_dlp
10
+ import transcription
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ def robust_read_csv(file_path: Path):
15
+ if not file_path.exists():
16
+ return
17
+
18
+ try:
19
+ with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
20
+ clean_lines = (line.replace('\0', '') for line in f)
21
+ reader = csv.DictReader(clean_lines)
22
+ for row in reader:
23
+ if row:
24
+ yield row
25
+ except Exception as e:
26
+ logger.error(f"Error reading CSV {file_path}: {e}")
27
+ return
28
+
29
+ def extract_tweet_id(url: str) -> str | None:
30
+ if not url: return None
31
+ match = re.search(r"(?:twitter|x)\.com/[^/]+/status/(\d+)", url)
32
+ if match: return match.group(1)
33
+ return None
34
+
35
+ def extract_twitter_username(url: str) -> str | None:
36
+ if not url: return None
37
+ match = re.search(r"(?:twitter|x)\.com/([^/]+)/status/\d+", url)
38
+ if match: return match.group(1).lower()
39
+ return None
40
+
41
+ def normalize_link(link: str) -> str:
42
+ if not link: return ""
43
+ return link.split('?')[0].strip().rstrip('/').replace('http://', '').replace('https://', '').replace('www.', '')
44
+
45
+ def parse_vtt(file_path: str) -> str:
46
+ """Parses a .vtt subtitle file and returns the clean text content."""
47
+ try:
48
+ if not os.path.exists(file_path):
49
+ return "Transcript file not found."
50
+
51
+ with open(file_path, 'r', encoding='utf-8') as f:
52
+ lines = f.readlines()
53
+
54
+ text_lines =[]
55
+ for line in lines:
56
+ line = line.strip()
57
+ if line and not line.startswith('WEBVTT') and not '-->' in line and not line.isdigit():
58
+ clean_line = re.sub(r'<[^>]+>', '', line)
59
+ if clean_line and (not text_lines or clean_line != text_lines[-1]):
60
+ text_lines.append(clean_line)
61
+
62
+ return "\n".join(text_lines) if text_lines else "No speech found in transcript."
63
+ except Exception as e:
64
+ logger.error(f"Error parsing VTT file {file_path}: {e}")
65
+ return f"Error reading transcript: {e}"
66
+
67
+ async def prepare_video_assets(link: str, output_id: str) -> dict:
68
+ video_dir = Path("data/videos")
69
+ if not video_dir.exists():
70
+ video_dir.mkdir(parents=True, exist_ok=True)
71
+
72
+ video_path = video_dir / f"{output_id}.mp4"
73
+ audio_path = video_dir / f"{output_id}.wav"
74
+ transcript_path = video_dir / f"{output_id}.vtt"
75
+
76
+ caption = ""
77
+ video_downloaded = False
78
+
79
+ ydl_opts = {
80
+ 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',
81
+ 'outtmpl': str(video_path),
82
+ 'quiet': True, 'ignoreerrors': True, 'no_warnings': True, 'skip_download': False
83
+ }
84
+
85
+ try:
86
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
87
+ info = ydl.extract_info(link, download=False)
88
+ if info:
89
+ caption = info.get('description', '') or info.get('title', '')
90
+ formats = info.get('formats',[])
91
+ if not formats and not info.get('url'):
92
+ logger.info(f"No video formats found for {link}. Treating as text-only.")
93
+ else:
94
+ if not video_path.exists(): ydl.download([link])
95
+ except Exception as e:
96
+ logger.error(f"Download error for {link}: {e}")
97
+
98
+ if video_path.exists() and video_path.stat().st_size > 0:
99
+ video_downloaded = True
100
+ if not audio_path.exists():
101
+ subprocess.run(["ffmpeg", "-y", "-i", str(video_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(audio_path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
102
+ if audio_path.exists() and not transcript_path.exists():
103
+ transcription.load_model()
104
+ transcription.generate_transcript(str(audio_path))
105
+
106
+ return {
107
+ "video": str(video_path) if video_downloaded else None,
108
+ "transcript": str(transcript_path) if video_downloaded and transcript_path.exists() else None,
109
+ "caption": caption
110
+ }
src/factuality_logic.py CHANGED
@@ -7,9 +7,13 @@ import asyncio
7
  from pathlib import Path
8
  import inference_logic
9
  from toon_parser import parse_toon_line
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
 
 
 
13
  PROMPT_VISUAL_ARTIFACTS = (
14
  "Analyze the video for visual manipulation (Deepfakes, editing anomalies).\n"
15
  "Steps inside <thinking>: 1. Scan for artifacts. 2. Check cuts.\n"
@@ -36,29 +40,10 @@ PROMPT_AUDIO_ANALYSIS = (
36
  "Score(1-10),\"Justification text\""
37
  )
38
 
39
-
40
- def parse_vtt(file_path: str) -> str:
41
- try:
42
- if not os.path.exists(file_path):
43
- return "Transcript file not found."
44
-
45
- with open(file_path, 'r', encoding='utf-8') as f:
46
- lines = f.readlines()
47
-
48
- text_lines = []
49
- for line in lines:
50
- line = line.strip()
51
- if line and not line.startswith('WEBVTT') and not '-->' in line and not line.isdigit():
52
- clean_line = re.sub(r'<[^>]+>', '', line)
53
- if clean_line and (not text_lines or clean_line != text_lines[-1]):
54
- text_lines.append(clean_line)
55
-
56
- return "\n".join(text_lines) if text_lines else "No speech found in transcript."
57
- except Exception as e:
58
- logger.error(f"Error parsing VTT file {file_path}: {e}")
59
- return f"Error reading transcript: {e}"
60
-
61
  async def run_factuality_pipeline(paths: dict, checks: dict, generation_config: dict):
 
 
 
62
  video_path = paths.get("video")
63
  transcript_path = paths.get("transcript")
64
 
@@ -78,7 +63,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
78
  yield f"\n--- Extracted Transcript ---\n{transcript}\n--------------------------\n\n"
79
  await asyncio.sleep(0.1)
80
 
81
- analysis_steps = []
82
  if checks.get("visuals"):
83
  analysis_steps.append(("Visual Integrity", PROMPT_VISUAL_ARTIFACTS))
84
  if checks.get("content"):
@@ -96,6 +81,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
96
  sampling_fps = current_gen_config.pop("sampling_fps", 2.0)
97
  current_gen_config.pop("num_perceptions", None)
98
 
 
99
  current_gen_config["temperature"] = 0.1
100
  current_gen_config["do_sample"] = True
101
 
@@ -109,7 +95,9 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
109
 
110
  yield f" - Analysis Complete for '{title}'. Parsing TOON...\n\n"
111
 
 
112
  parsed_result = {}
 
113
  match = re.search(r'(\w+_analysis): result\[2\]\{score,justification\}:\s*\n(.+)', ans, re.MULTILINE)
114
 
115
  thinking = "No thinking block found."
@@ -125,6 +113,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
125
  yield f"Warning: Model did not return valid TOON. Raw output:\n{ans}\n"
126
  continue
127
 
 
128
  score = parsed_result.get('score', 'N/A')
129
  justification = parsed_result.get('justification', 'No justification provided.')
130
 
 
7
  from pathlib import Path
8
  import inference_logic
9
  from toon_parser import parse_toon_line
10
+ from common_utils import parse_vtt
11
 
12
  logger = logging.getLogger(__name__)
13
 
14
+ # --- Enhanced TOON Prompts for Individual Checks ---
15
+ # Using TOON reduces output tokens significantly compared to JSON.
16
+
17
  PROMPT_VISUAL_ARTIFACTS = (
18
  "Analyze the video for visual manipulation (Deepfakes, editing anomalies).\n"
19
  "Steps inside <thinking>: 1. Scan for artifacts. 2. Check cuts.\n"
 
40
  "Score(1-10),\"Justification text\""
41
  )
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  async def run_factuality_pipeline(paths: dict, checks: dict, generation_config: dict):
44
+ """
45
+ Asynchronously runs a pipeline of factuality checks, parses TOON scores, and yields results.
46
+ """
47
  video_path = paths.get("video")
48
  transcript_path = paths.get("transcript")
49
 
 
63
  yield f"\n--- Extracted Transcript ---\n{transcript}\n--------------------------\n\n"
64
  await asyncio.sleep(0.1)
65
 
66
+ analysis_steps =[]
67
  if checks.get("visuals"):
68
  analysis_steps.append(("Visual Integrity", PROMPT_VISUAL_ARTIFACTS))
69
  if checks.get("content"):
 
81
  sampling_fps = current_gen_config.pop("sampling_fps", 2.0)
82
  current_gen_config.pop("num_perceptions", None)
83
 
84
+ # FORCE LOW TEMP for structured TOON analysis
85
  current_gen_config["temperature"] = 0.1
86
  current_gen_config["do_sample"] = True
87
 
 
95
 
96
  yield f" - Analysis Complete for '{title}'. Parsing TOON...\n\n"
97
 
98
+ # --- Attempt to parse TOON from the model's response ---
99
  parsed_result = {}
100
+ # Regex to find the TOON data line: key: type[count]{headers}:\nVALUE
101
  match = re.search(r'(\w+_analysis): result\[2\]\{score,justification\}:\s*\n(.+)', ans, re.MULTILINE)
102
 
103
  thinking = "No thinking block found."
 
113
  yield f"Warning: Model did not return valid TOON. Raw output:\n{ans}\n"
114
  continue
115
 
116
+ # --- Display the parsed, structured result ---
117
  score = parsed_result.get('score', 'N/A')
118
  justification = parsed_result.get('justification', 'No justification provided.')
119
 
src/inference_logic.py CHANGED
@@ -1,14 +1,14 @@
 
1
  import re
 
2
  import sys
3
  import os
4
- import time
5
  import logging
6
  import asyncio
7
  import json
8
- import requests
9
  import datetime
 
10
 
11
- # Safe imports for Lite Mode (API only)
12
  try:
13
  from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
14
  from peft import PeftModel
@@ -17,28 +17,30 @@ except ImportError:
17
  AutoProcessor = None
18
  PeftModel = None
19
 
20
- from labeling_logic import (
21
- LABELING_PROMPT_TEMPLATE, SCORE_INSTRUCTIONS_SIMPLE, SCORE_INSTRUCTIONS_REASONING,
22
- SCHEMA_SIMPLE, SCHEMA_REASONING,
23
- FCOT_MACRO_PROMPT, FCOT_MESO_PROMPT, FCOT_SYNTHESIS_PROMPT
24
- )
25
- from toon_parser import parse_veracity_toon
26
-
27
- # Optional local imports
28
  try:
29
  from my_vision_process import process_vision_info, client
30
  except ImportError:
31
  process_vision_info = None
32
  client = None
33
 
 
 
 
 
 
 
 
 
 
34
  # Google GenAI Imports
35
  try:
36
  import google.generativeai as genai_legacy
37
- from google.generativeai.types import generation_types, HarmCategory, HarmBlockThreshold
38
  except ImportError:
39
  genai_legacy = None
40
 
41
  try:
 
42
  from google import genai
43
  from google.genai.types import (
44
  GenerateContentConfig,
@@ -47,8 +49,7 @@ try:
47
  Tool,
48
  VertexAISearch,
49
  GoogleSearch,
50
- Part,
51
- SafetySetting
52
  )
53
  import vertexai
54
  except ImportError:
@@ -62,18 +63,133 @@ peft_model = None
62
  active_model = None
63
  logger = logging.getLogger(__name__)
64
 
65
- TEXT_ONLY_INSTRUCTIONS = """
66
- NOTE: You are operating in TEXT-ONLY mode. The video file could not be analyzed directly.
67
- You must rely entirely on the provided Context (Caption and Transcript) to deduce the veracity.
68
- If the text lacks sufficient detail to score visual or audio integrity, score them as 5 (Neutral/Unknown).
69
- """
70
-
71
- def get_formatted_tag_list():
72
- return "Suggested tags: politics, satire, deepfake, misleading, true, news"
73
-
74
  def load_models():
75
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  def extract_json_from_text(text):
78
  try:
79
  match = re.search(r'\{[\s\S]*\}', text)
@@ -82,39 +198,51 @@ def extract_json_from_text(text):
82
  except:
83
  pass
84
  return {}
85
-
86
- def smart_merge(base, new_data):
87
- if not isinstance(new_data, dict): return new_data if new_data else base
88
- if not isinstance(base, dict): return new_data
89
- for k, v in new_data.items():
90
- if k not in base: base[k] = v
91
- else:
92
- if isinstance(base[k], dict) and isinstance(v, dict): smart_merge(base[k], v)
93
- else:
94
- base_val = base[k]
95
- new_val = v
96
- is_base_valid = base_val and str(base_val) != "0" and str(base_val).lower() != "n/a"
97
- is_new_valid = new_val and str(new_val) != "0" and str(new_val).lower() != "n/a"
98
- if not is_base_valid and is_new_valid: base[k] = new_val
99
- return base
100
 
101
  def validate_parsed_data(data, is_text_only):
102
  missing =[]
 
103
  if not data.get('video_context_summary'): missing.append("summary")
 
104
  final = data.get('final_assessment', {})
105
  if not final.get('reasoning') or len(str(final.get('reasoning', ''))) < 5: missing.append("final:reasoning")
 
106
  vectors = data.get('veracity_vectors', {})
107
- for k in['visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score']:
 
108
  if k in['visual_integrity_score', 'audio_integrity_score'] and is_text_only: continue
109
  v = vectors.get(k)
110
  if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"vector:{k}")
 
111
  mod = data.get('modalities', {})
112
  for k in['video_audio_score', 'video_caption_score', 'audio_caption_score']:
113
  if k in['video_audio_score', 'video_caption_score'] and is_text_only: continue
114
  v = mod.get(k)
115
  if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"modality:{k}")
 
 
 
 
 
 
 
116
  return missing
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  def save_debug_log(request_id, kind, content, attempt, label=""):
119
  if not request_id: return
120
  try:
@@ -128,82 +256,42 @@ def save_debug_log(request_id, kind, content, attempt, label=""):
128
  except Exception as e:
129
  logger.error(f"Failed to save debug log: {e}")
130
 
131
- async def attempt_toon_repair(original_text: str, schema: str, client, model_type: str, config: dict):
132
- logger.info("Attempting TOON Repair...")
133
- repair_prompt = f"SYSTEM: Reformat the following text into strict TOON schema. Infer missing scores as 0.\n\nSCHEMA:\n{schema}\n\nINPUT:\n{original_text}\n"
134
- try:
135
- loop = asyncio.get_event_loop()
136
- repaired_text = ""
137
- if model_type == 'gemini':
138
- model = genai_legacy.GenerativeModel("models/gemini-2.0-flash-exp")
139
- response = await loop.run_in_executor(None, lambda: model.generate_content(repair_prompt))
140
- repaired_text = response.text
141
- elif model_type == 'vertex':
142
- cl = client if client else genai.Client(vertexai=True, project=config['project_id'], location=config['location'])
143
- response = await loop.run_in_executor(None, lambda: cl.models.generate_content(model=config['model_name'], contents=repair_prompt))
144
- repaired_text = response.text
145
- return repaired_text
146
- except Exception as e:
147
- logger.error(f"Repair failed: {e}")
148
- return original_text
149
-
150
  async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript: str, gemini_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
151
  if genai_legacy is None:
152
  yield "ERROR: Legacy SDK missing.\n"
153
  return
154
-
155
  api_key = gemini_config.get("api_key")
156
- if not api_key:
157
- yield "ERROR: No Gemini API Key provided."
158
- return
159
-
160
  max_retries = int(gemini_config.get("max_retries", 1))
161
-
162
- safety_settings =[
163
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
164
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
165
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
166
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
167
- ]
168
-
169
  try:
170
  genai_legacy.configure(api_key=api_key)
171
  loop = asyncio.get_event_loop()
172
  uploaded_file = None
173
  is_text_only = False
174
-
175
  if video_path and os.path.exists(video_path):
176
- yield f"Uploading video to Gemini..."
177
- uploaded_file = await loop.run_in_executor(None, lambda: genai_legacy.upload_file(path=video_path, mime_type="video/mp4"))
178
- wait_start = time.time()
179
- while True:
180
- uploaded_file = await loop.run_in_executor(None, lambda: genai_legacy.get_file(uploaded_file.name))
181
- state_name = uploaded_file.state.name
182
- if state_name == "ACTIVE": break
183
- elif state_name == "FAILED":
184
- yield "ERROR: Google failed to process video."
185
- return
186
- if time.time() - wait_start > 300:
187
- yield "ERROR: Video processing timed out."
188
- return
189
- yield "Processing video on Google servers..."
190
- await asyncio.sleep(5)
191
- else:
192
- is_text_only = True
193
 
194
- model_name = gemini_config.get("model_name") or "models/gemini-2.0-flash-exp"
195
- model = genai_legacy.GenerativeModel(model_name)
 
 
 
 
 
 
 
196
  toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
197
  score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
 
198
 
199
- if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
200
-
201
- raw_text = ""
202
- prompt_used = ""
203
- gen_config = {"temperature": 0.1}
204
  accumulated_data = {}
 
205
  fcot_trace = {}
206
  full_raw_text = ""
 
207
 
208
  for attempt in range(max_retries + 1):
209
  raw_text = ""
@@ -228,52 +316,46 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
228
  save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
229
  else:
230
  if reasoning_method == "fcot":
231
- yield "Starting FCoT (Gemini)..."
232
  chat = model.start_chat(history=[])
233
 
234
  macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
235
- if is_text_only: macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
236
  save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
237
-
238
- inputs1 =[macro_prompt]
239
  if uploaded_file: inputs1.insert(0, uploaded_file)
240
-
241
- res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1, safety_settings=safety_settings))
242
  macro_hypothesis = res1.text
243
  save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
244
  fcot_trace['macro'] = macro_hypothesis
245
- yield f"Hypothesis: {macro_hypothesis[:100]}...\n"
246
 
247
  meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
248
  save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
249
- res2 = await loop.run_in_executor(None, lambda: chat.send_message(meso_prompt, safety_settings=safety_settings))
250
  micro_observations = res2.text
251
  save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
252
  fcot_trace['meso'] = micro_observations
253
-
254
- synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=get_formatted_tag_list())
255
  save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
256
- res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt, safety_settings=safety_settings))
257
  raw_text = res3.text
258
  save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
259
- prompt_used = f"FCoT:\n{macro_prompt}\n..."
260
  else:
261
- prompt_text = LABELING_PROMPT_TEMPLATE.format(system_persona=system_persona, caption=caption, transcript=transcript, toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=get_formatted_tag_list())
262
- if is_text_only: prompt_text = "NOTE: Text Only Analysis.\n" + prompt_text
 
 
 
263
  prompt_used = prompt_text
264
- save_debug_log(request_id, 'prompt', prompt_text, attempt, 'standard')
265
- yield f"Generating Labels ({model_name})..."
266
-
267
  inputs = [prompt_text]
268
  if uploaded_file: inputs.append(uploaded_file)
269
-
270
- response = await loop.run_in_executor(
271
- None,
272
- lambda: model.generate_content(inputs, generation_config=gen_config, safety_settings=safety_settings)
273
- )
274
  raw_text = response.text
275
- save_debug_log(request_id, 'response', raw_text, attempt, 'standard')
276
-
277
  if raw_text:
278
  full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
279
  parsed_step = parse_veracity_toon(raw_text)
@@ -286,28 +368,20 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
286
  else:
287
  parsed_step[k] = json_data[k]
288
  accumulated_data = smart_merge(accumulated_data, parsed_step)
289
-
290
  missing_fields = validate_parsed_data(accumulated_data, is_text_only)
291
  if not missing_fields:
292
- yield "Validation Passed. All factuality components processed and confidence scores obtained.\n"
293
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
294
  break
295
-
296
  if attempt == max_retries:
297
  yield f"Max retries reached. Saving incomplete data.\n"
298
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
299
  break
300
 
301
- if uploaded_file:
302
- try:
303
- await loop.run_in_executor(None, lambda: genai_legacy.delete_file(name=uploaded_file.name))
304
- except Exception:
305
- pass
306
-
307
- except Exception as e:
308
- logger.error(f"Gemini Pipeline Error: {e}", exc_info=True)
309
- yield f"ERROR (Gemini): {e}"
310
-
311
 
312
  async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript: str, vertex_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
313
  if genai is None:
@@ -315,52 +389,55 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
315
  return
316
 
317
  project_id = vertex_config.get("project_id")
318
- if not project_id:
319
- yield "ERROR: No Vertex Project ID."
320
- return
 
321
 
322
- safety_settings =[
323
- SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_ONLY_HIGH"),
324
- SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH"),
325
- SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_ONLY_HIGH"),
326
- SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_ONLY_HIGH"),
327
- ]
328
 
329
  try:
330
- api_key = vertex_config.get("api_key")
331
  if api_key:
332
- client = genai.Client(vertexai=True, project=project_id, location=vertex_config.get("location", "us-central1"), api_key=api_key)
333
  else:
334
- client = genai.Client(vertexai=True, project=project_id, location=vertex_config.get("location", "us-central1"))
335
-
336
  video_part = None
337
  is_text_only = False
338
  if video_path and os.path.exists(video_path):
339
  with open(video_path, 'rb') as f: video_bytes = f.read()
340
  video_part = Part.from_bytes(data=video_bytes, mime_type="video/mp4")
341
- else:
342
- is_text_only = True
 
 
 
 
 
 
 
 
 
 
 
343
 
344
- toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
345
- score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
346
- model_name = vertex_config.get("model_name", "gemini-2.5-flash-lite")
347
- max_retries = int(vertex_config.get("max_retries", 1))
348
-
349
- raw_text = ""
350
- prompt_used = ""
351
- loop = asyncio.get_event_loop()
352
  config = GenerateContentConfig(
353
- temperature=0.1,
354
- response_mime_type="text/plain",
355
- tools=[Tool(google_search=GoogleSearch())] if vertex_config.get("use_search", True) else None,
356
- safety_settings=safety_settings
357
  )
358
 
359
- if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
360
-
 
 
361
  accumulated_data = {}
 
362
  fcot_trace = {}
363
  full_raw_text = ""
 
 
 
364
 
365
  for attempt in range(max_retries + 1):
366
  raw_text = ""
@@ -368,6 +445,7 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
368
  missing = validate_parsed_data(accumulated_data, is_text_only)
369
  yield f"Validation failed. Missing or incomplete fields: {missing}. Initiating Iterative Reprompt (Attempt {attempt}/{max_retries}) to acquire remaining factuality components...\n"
370
 
 
371
  prompt_text = (
372
  f"SYSTEM: Review the previous attempt which failed validation.\n"
373
  f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
@@ -388,12 +466,12 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
388
  save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
389
  else:
390
  if reasoning_method == "fcot":
391
- yield "Starting FCoT (Vertex)..."
392
  chat = client.chats.create(model=model_name, config=config)
393
 
394
  macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
395
  save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
396
- inputs1 =[macro_prompt]
397
  if video_part: inputs1.insert(0, video_part)
398
  else: inputs1[0] = "NOTE: Text Only Analysis.\n" + inputs1[0]
399
 
@@ -401,7 +479,6 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
401
  macro_hypothesis = res1.text
402
  save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
403
  fcot_trace['macro'] = macro_hypothesis
404
- yield f"Hypothesis: {macro_hypothesis[:80]}...\n"
405
 
406
  meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
407
  save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
@@ -410,31 +487,27 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
410
  save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
411
  fcot_trace['meso'] = micro_observations
412
 
413
- synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=get_formatted_tag_list())
414
  save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
415
  res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
416
  raw_text = res3.text
417
  save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
418
- prompt_used = f"FCoT (Vertex):\n{macro_prompt}..."
419
-
420
  else:
421
- prompt_text = LABELING_PROMPT_TEMPLATE.format(system_persona=system_persona, caption=caption, transcript=transcript, toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=get_formatted_tag_list())
422
- contents = []
 
 
 
 
423
  if video_part: contents =[video_part, prompt_text]
424
  else: contents =[f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"]
425
  prompt_used = prompt_text
426
- save_debug_log(request_id, 'prompt', prompt_text, attempt, 'standard')
427
- yield f"Generating Labels ({model_name})..."
428
- response = await loop.run_in_executor(
429
- None,
430
- lambda: client.models.generate_content(model=model_name, contents=contents, config=config)
431
- )
432
  raw_text = response.text
433
- save_debug_log(request_id, 'response', raw_text, attempt, 'standard')
434
-
435
- if not raw_text:
436
- yield {"error": "Empty Response"}
437
- return
438
 
439
  if raw_text:
440
  full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
@@ -451,7 +524,7 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
451
 
452
  missing_fields = validate_parsed_data(accumulated_data, is_text_only)
453
  if not missing_fields:
454
- yield "Validation Passed. All factuality components processed and confidence scores obtained.\n"
455
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
456
  break
457
 
@@ -459,12 +532,11 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
459
  yield f"Max retries reached. Saving incomplete data.\n"
460
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
461
  break
462
-
463
  except Exception as e:
464
- yield f"ERROR (Vertex): {e}"
465
  logger.error("Vertex Labeling Error", exc_info=True)
466
 
467
-
468
  async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: str, nrp_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
469
  api_key = nrp_config.get("api_key")
470
  model_name = nrp_config.get("model_name", "gpt-4")
@@ -482,6 +554,11 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
482
 
483
  is_text_only = True
484
  system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
 
 
 
 
 
485
 
486
  toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
487
  score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
@@ -499,11 +576,30 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
499
  "messages": messages,
500
  "temperature": 0.1
501
  }
 
 
 
 
 
502
  def do_request():
 
 
 
503
  resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
 
 
 
 
504
  if resp.status_code != 200:
 
505
  raise Exception(f"API Error {resp.status_code}: {resp.text}")
506
- return resp.json()["choices"][0]["message"]["content"]
 
 
 
 
 
 
507
  return await loop.run_in_executor(None, do_request)
508
 
509
  try:
@@ -511,63 +607,85 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
511
  raw_text = ""
512
  if attempt > 0:
513
  missing = validate_parsed_data(accumulated_data, is_text_only)
514
- yield f"Validation failed. Missing fields: {missing}. Initiating Reprompt...\n"
 
515
  prompt_text = (
516
  f"SYSTEM: Review the previous attempt which failed validation.\n"
517
  f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
518
  f"PREVIOUS (PARTIAL) DATA: {json.dumps(accumulated_data, indent=2)}\n"
519
  f"MISSING FIELDS: {missing}\n"
520
- f"INSTRUCTION: Generate the missing fields to complete the schema.\n"
 
521
  f"{toon_schema}"
522
  )
 
523
  save_debug_log(request_id, 'prompt', prompt_text, attempt, 'reprompt')
 
 
524
  raw_text = await _call_nrp([
525
  {"role": "system", "content": system_persona},
526
  {"role": "user", "content": prompt_text}
527
- ])
 
 
528
  save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
529
  else:
530
  if reasoning_method == "fcot":
531
  yield "Starting Fractal Chain of Thought (NRP FCoT)...\n"
 
532
  macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
533
  macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
534
  save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
535
 
536
  macro_messages =[{"role": "system", "content": system_persona}, {"role": "user", "content": macro_prompt}]
537
- macro_hypothesis = await _call_nrp(macro_messages)
 
 
 
538
  save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
539
  fcot_trace['macro'] = macro_hypothesis
540
 
541
  meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
542
  save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
543
-
544
  meso_messages = macro_messages +[{"role": "assistant", "content": macro_hypothesis}, {"role": "user", "content": meso_prompt}]
545
- micro_observations = await _call_nrp(meso_messages)
 
 
 
 
546
  save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
547
  fcot_trace['meso'] = micro_observations
548
 
549
  synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
550
  save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
551
-
552
  synthesis_messages = meso_messages +[{"role": "assistant", "content": micro_observations}, {"role": "user", "content": synthesis_prompt}]
553
- raw_text = await _call_nrp(synthesis_messages)
 
 
 
 
554
  save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
555
  prompt_used = f"FCoT (NRP):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
556
 
557
  else:
558
- prompt_text = LABELING_PROMPT_TEMPLATE.format(
 
559
  system_persona=system_persona, caption=caption, transcript=transcript,
560
  toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
561
  )
562
  prompt_text = f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"
563
  prompt_used = prompt_text
564
- save_debug_log(request_id, 'prompt', prompt_text, attempt, 'standard')
565
- yield "Generating Labels (NRP CoT)...\n"
 
 
566
  raw_text = await _call_nrp([
567
  {"role": "system", "content": system_persona},
568
  {"role": "user", "content": prompt_text}
569
- ])
570
- save_debug_log(request_id, 'response', raw_text, attempt, 'standard')
 
 
571
 
572
  if raw_text:
573
  full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
@@ -584,14 +702,10 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
584
 
585
  missing_fields = validate_parsed_data(accumulated_data, is_text_only)
586
  if not missing_fields:
587
- yield "Validation Passed.\n"
588
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
589
  break
590
 
591
  if attempt == max_retries:
592
- yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
593
- break
594
-
595
- except Exception as e:
596
- yield f"ERROR: {e}\n\n"
597
- logger.error("NRP Labeling Error", exc_info=True)
 
1
+ import torch
2
  import re
3
+ import ast
4
  import sys
5
  import os
 
6
  import logging
7
  import asyncio
8
  import json
 
9
  import datetime
10
+ import requests
11
 
 
12
  try:
13
  from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
14
  from peft import PeftModel
 
17
  AutoProcessor = None
18
  PeftModel = None
19
 
 
 
 
 
 
 
 
 
20
  try:
21
  from my_vision_process import process_vision_info, client
22
  except ImportError:
23
  process_vision_info = None
24
  client = None
25
 
26
+ from labeling_logic import (
27
+ LABELING_PROMPT_TEMPLATE, LABELING_PROMPT_TEMPLATE_NO_COT,
28
+ SCORE_INSTRUCTIONS_SIMPLE, SCORE_INSTRUCTIONS_REASONING,
29
+ SCHEMA_SIMPLE, SCHEMA_REASONING,
30
+ FCOT_MACRO_PROMPT, FCOT_MESO_PROMPT, FCOT_SYNTHESIS_PROMPT, TEXT_ONLY_INSTRUCTIONS,
31
+ get_formatted_tag_list
32
+ )
33
+ from toon_parser import parse_veracity_toon
34
+
35
  # Google GenAI Imports
36
  try:
37
  import google.generativeai as genai_legacy
38
+ from google.generativeai.types import generation_types
39
  except ImportError:
40
  genai_legacy = None
41
 
42
  try:
43
+ # Modern Google GenAI SDK (v1)
44
  from google import genai
45
  from google.genai.types import (
46
  GenerateContentConfig,
 
49
  Tool,
50
  VertexAISearch,
51
  GoogleSearch,
52
+ Part
 
53
  )
54
  import vertexai
55
  except ImportError:
 
63
  active_model = None
64
  logger = logging.getLogger(__name__)
65
 
 
 
 
 
 
 
 
 
 
66
  def load_models():
67
+ global LITE_MODE, processor, base_model, peft_model, active_model
68
+
69
+ if LITE_MODE:
70
+ logger.info("LITE_MODE is enabled. Skipping local model loading.")
71
+ return
72
+
73
+ if base_model is not None: return
74
+
75
+ if not torch.cuda.is_available():
76
+ logger.warning("CUDA is not available. This application requires a GPU for local models. Switching to LITE_MODE.")
77
+ LITE_MODE = True
78
+ return
79
+
80
+ device = torch.device("cuda")
81
+ logger.info(f"CUDA is available. Initializing models on {device}...")
82
+ local_model_path = "/app/local_model"
83
+
84
+ try:
85
+ import flash_attn
86
+ attn_implementation = "flash_attention_2"
87
+ except ImportError:
88
+ attn_implementation = "sdpa"
89
+
90
+ logger.info(f"Loading base model from {local_model_path}...")
91
+ try:
92
+ base_model = Qwen3VLForConditionalGeneration.from_pretrained(
93
+ local_model_path, dtype=torch.bfloat16, device_map="auto", attn_implementation=attn_implementation
94
+ ).eval()
95
+ processor = AutoProcessor.from_pretrained(local_model_path)
96
+ active_model = base_model
97
+ except Exception as e:
98
+ logger.error(f"Failed to load local model: {e}")
99
+ LITE_MODE = True
100
+
101
+ def switch_active_model(model_name: str):
102
+ global active_model, base_model, peft_model
103
+ if model_name == "custom" and peft_model is not None:
104
+ active_model = peft_model
105
+ else:
106
+ active_model = base_model
107
+
108
+ def inference_step(video_path, prompt, generation_kwargs, sampling_fps, pred_glue=None):
109
+ global processor, active_model
110
+ if active_model is None: raise RuntimeError("Models not loaded.")
111
+
112
+ messages =[
113
+ {"role": "user", "content":[
114
+ {"type": "video", "video": video_path, 'key_time': pred_glue, 'fps': sampling_fps,
115
+ "total_pixels": 128*12 * 28 * 28, "min_pixels": 128 * 28 * 28},
116
+ {"type": "text", "text": prompt},
117
+ ]
118
+ },
119
+ ]
120
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
121
+ image_inputs, video_inputs, video_kwargs = process_vision_info(messages, return_video_kwargs=True, client=client)
122
+ fps_inputs = video_kwargs['fps'][0]
123
+ inputs = processor(text=[text], images=image_inputs, videos=video_inputs, fps=fps_inputs, padding=True, return_tensors="pt")
124
+ inputs = {k: v.to(active_model.device) for k, v in inputs.items()}
125
+
126
+ with torch.no_grad():
127
+ output_ids = active_model.generate(**inputs, **generation_kwargs, use_cache=True)
128
 
129
+ generated_ids = [output_ids[i][len(inputs['input_ids'][i]):] for i in range(len(output_ids))]
130
+ output_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
131
+ return output_text[0]
132
+
133
+ async def generate_simple_text(prompt: str, model_type: str, config: dict):
134
+ loop = asyncio.get_event_loop()
135
+ try:
136
+ if model_type == 'gemini':
137
+ if genai_legacy is None: return "Error: Legacy SDK missing."
138
+ genai_legacy.configure(api_key=config.get("api_key"))
139
+ model = genai_legacy.GenerativeModel(config.get("model_name", "models/gemini-2.0-flash-exp"))
140
+ response = await loop.run_in_executor(
141
+ None,
142
+ lambda: model.generate_content(prompt, generation_config={"temperature": 0.0})
143
+ )
144
+ return response.text
145
+
146
+ elif model_type == 'vertex':
147
+ if genai is None: return "Error: Vertex SDK missing."
148
+ api_key = config.get("api_key")
149
+ if api_key:
150
+ cl = genai.Client(vertexai=True, project=config['project_id'], location=config['location'], api_key=api_key)
151
+ else:
152
+ cl = genai.Client(vertexai=True, project=config['project_id'], location=config['location'])
153
+ response = await loop.run_in_executor(
154
+ None,
155
+ lambda: cl.models.generate_content(
156
+ model=config['model_name'],
157
+ contents=prompt,
158
+ config=GenerateContentConfig(temperature=0.0)
159
+ )
160
+ )
161
+ return response.text
162
+
163
+ elif model_type == 'nrp':
164
+ api_key = config.get("api_key")
165
+ model_name = config.get("model_name", "gpt-4")
166
+ base_url = config.get("base_url", "https://api.openai.com/v1").rstrip("/")
167
+ if not api_key: return "Error: NRP API key missing."
168
+ headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
169
+ payload = {"model": model_name, "messages":[{"role": "user", "content": prompt}], "temperature": 0.0}
170
+ def do_request():
171
+ resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
172
+ if resp.status_code == 200:
173
+ return resp.json()["choices"][0]["message"]["content"]
174
+ return f"Error: {resp.status_code} {resp.text}"
175
+ return await loop.run_in_executor(None, do_request)
176
+
177
+ except Exception as e:
178
+ logger.error(f"Text Gen Error: {e}")
179
+ return f"Error generating text: {e}"
180
+
181
+ async def generate_community_summary(comments: list, model_type: str, config: dict):
182
+ if not comments: return "No comments available."
183
+ c_text = "\n".join([f"- {c.get('author', 'User')}: {c.get('text', '')}" for c in comments[:15]])
184
+ prompt = (
185
+ "You are a Community Context Analyst. Analyze the following user comments regarding a social media post.\n"
186
+ "Your goal is to extract 'Community Notes' - specifically looking for fact-checking, debunking, or additional context provided by users.\n"
187
+ f"COMMENTS:\n{c_text}\n\n"
188
+ "OUTPUT:\n"
189
+ "Provide a concise 1-paragraph summary of the community consensus regarding the veracity of the post."
190
+ )
191
+ return await generate_simple_text(prompt, model_type, config)
192
+
193
  def extract_json_from_text(text):
194
  try:
195
  match = re.search(r'\{[\s\S]*\}', text)
 
198
  except:
199
  pass
200
  return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  def validate_parsed_data(data, is_text_only):
203
  missing =[]
204
+
205
  if not data.get('video_context_summary'): missing.append("summary")
206
+
207
  final = data.get('final_assessment', {})
208
  if not final.get('reasoning') or len(str(final.get('reasoning', ''))) < 5: missing.append("final:reasoning")
209
+
210
  vectors = data.get('veracity_vectors', {})
211
+ required_vectors =['visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score']
212
+ for k in required_vectors:
213
  if k in['visual_integrity_score', 'audio_integrity_score'] and is_text_only: continue
214
  v = vectors.get(k)
215
  if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"vector:{k}")
216
+
217
  mod = data.get('modalities', {})
218
  for k in['video_audio_score', 'video_caption_score', 'audio_caption_score']:
219
  if k in['video_audio_score', 'video_caption_score'] and is_text_only: continue
220
  v = mod.get(k)
221
  if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"modality:{k}")
222
+
223
+ fact = data.get('factuality_factors', {})
224
+ if not fact.get('claim_accuracy'): missing.append("factuality:claim_accuracy")
225
+
226
+ disinfo = data.get('disinformation_analysis', {})
227
+ if not disinfo.get('classification'): missing.append("disinfo:classification")
228
+
229
  return missing
230
 
231
+ def smart_merge(base, new_data):
232
+ if not isinstance(new_data, dict): return new_data if new_data else base
233
+ if not isinstance(base, dict): return new_data
234
+ for k, v in new_data.items():
235
+ if k not in base: base[k] = v
236
+ else:
237
+ if isinstance(base[k], dict) and isinstance(v, dict): smart_merge(base[k], v)
238
+ else:
239
+ base_val = base[k]
240
+ new_val = v
241
+ is_base_valid = base_val and str(base_val) != "0" and str(base_val).lower() != "n/a"
242
+ is_new_valid = new_val and str(new_val) != "0" and str(new_val).lower() != "n/a"
243
+ if not is_base_valid and is_new_valid: base[k] = new_val
244
+ return base
245
+
246
  def save_debug_log(request_id, kind, content, attempt, label=""):
247
  if not request_id: return
248
  try:
 
256
  except Exception as e:
257
  logger.error(f"Failed to save debug log: {e}")
258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript: str, gemini_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
260
  if genai_legacy is None:
261
  yield "ERROR: Legacy SDK missing.\n"
262
  return
 
263
  api_key = gemini_config.get("api_key")
264
+ if not api_key: return
 
 
 
265
  max_retries = int(gemini_config.get("max_retries", 1))
266
+
 
 
 
 
 
 
 
267
  try:
268
  genai_legacy.configure(api_key=api_key)
269
  loop = asyncio.get_event_loop()
270
  uploaded_file = None
271
  is_text_only = False
 
272
  if video_path and os.path.exists(video_path):
273
+ uploaded_file = await loop.run_in_executor(None, lambda: genai_legacy.upload_file(path=video_path))
274
+ while uploaded_file.state.name == "PROCESSING": await asyncio.sleep(2)
275
+ else: is_text_only = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
 
277
+ active_tools =[]
278
+ if gemini_config.get("use_search", False):
279
+ active_tools.append({"google_search_retrieval": {}})
280
+ system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
281
+ if gemini_config.get("use_code", False):
282
+ active_tools.append({"code_execution": {}})
283
+ system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
284
+
285
+ model = genai_legacy.GenerativeModel("models/gemini-2.0-flash-exp", tools=active_tools if active_tools else None)
286
  toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
287
  score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
288
+ tag_list_text = get_formatted_tag_list()
289
 
 
 
 
 
 
290
  accumulated_data = {}
291
+ prompt_used = ""
292
  fcot_trace = {}
293
  full_raw_text = ""
294
+ if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
295
 
296
  for attempt in range(max_retries + 1):
297
  raw_text = ""
 
316
  save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
317
  else:
318
  if reasoning_method == "fcot":
319
+ yield "Starting Fractal Chain of Thought (Gemini FCoT)..."
320
  chat = model.start_chat(history=[])
321
 
322
  macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
 
323
  save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
324
+ inputs1 = [macro_prompt]
 
325
  if uploaded_file: inputs1.insert(0, uploaded_file)
326
+ res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1))
 
327
  macro_hypothesis = res1.text
328
  save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
329
  fcot_trace['macro'] = macro_hypothesis
 
330
 
331
  meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
332
  save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
333
+ res2 = await loop.run_in_executor(None, lambda: chat.send_message(meso_prompt))
334
  micro_observations = res2.text
335
  save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
336
  fcot_trace['meso'] = micro_observations
337
+
338
+ synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
339
  save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
340
+ res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
341
  raw_text = res3.text
342
  save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
343
+ prompt_used = f"FCoT Pipeline:\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
344
  else:
345
+ template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
346
+ prompt_text = template.format(
347
+ system_persona=system_persona, caption=caption, transcript=transcript,
348
+ toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
349
+ )
350
  prompt_used = prompt_text
351
+ if is_text_only: prompt_text = "NOTE: Text Analysis Only.\n" + prompt_text
352
+ save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
 
353
  inputs = [prompt_text]
354
  if uploaded_file: inputs.append(uploaded_file)
355
+ response = await loop.run_in_executor(None, lambda: model.generate_content(inputs, generation_config={"temperature": 0.1}))
 
 
 
 
356
  raw_text = response.text
357
+ save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
358
+
359
  if raw_text:
360
  full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
361
  parsed_step = parse_veracity_toon(raw_text)
 
368
  else:
369
  parsed_step[k] = json_data[k]
370
  accumulated_data = smart_merge(accumulated_data, parsed_step)
371
+
372
  missing_fields = validate_parsed_data(accumulated_data, is_text_only)
373
  if not missing_fields:
374
+ yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
375
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
376
  break
377
+
378
  if attempt == max_retries:
379
  yield f"Max retries reached. Saving incomplete data.\n"
380
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
381
  break
382
 
383
+ if uploaded_file: await loop.run_in_executor(None, lambda: genai_legacy.delete_file(name=uploaded_file.name))
384
+ except Exception as e: yield f"ERROR: {e}"
 
 
 
 
 
 
 
 
385
 
386
  async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript: str, vertex_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
387
  if genai is None:
 
389
  return
390
 
391
  project_id = vertex_config.get("project_id")
392
+ location = vertex_config.get("location", "us-central1")
393
+ model_name = vertex_config.get("model_name", "gemini-1.5-pro-preview-0409")
394
+ max_retries = int(vertex_config.get("max_retries", 1))
395
+ api_key = vertex_config.get("api_key")
396
 
397
+ if not project_id: return
 
 
 
 
 
398
 
399
  try:
400
+ # Pass api_key directly if available to use API Keys instead of ADC Service Accounts
401
  if api_key:
402
+ client = genai.Client(vertexai=True, project=project_id, location=location, api_key=api_key)
403
  else:
404
+ client = genai.Client(vertexai=True, project=project_id, location=location)
405
+
406
  video_part = None
407
  is_text_only = False
408
  if video_path and os.path.exists(video_path):
409
  with open(video_path, 'rb') as f: video_bytes = f.read()
410
  video_part = Part.from_bytes(data=video_bytes, mime_type="video/mp4")
411
+ else: is_text_only = True
412
+
413
+ active_tools =[]
414
+ if vertex_config.get("use_search", False):
415
+ active_tools.append(Tool(google_search=GoogleSearch()))
416
+ system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
417
+ if vertex_config.get("use_code", False):
418
+ try:
419
+ from google.genai.types import CodeExecution
420
+ active_tools.append(Tool(code_execution=CodeExecution()))
421
+ system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
422
+ except ImportError:
423
+ pass
424
 
 
 
 
 
 
 
 
 
425
  config = GenerateContentConfig(
426
+ temperature=0.1, response_mime_type="text/plain", max_output_tokens=8192,
427
+ tools=active_tools if active_tools else None
 
 
428
  )
429
 
430
+ toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
431
+ score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
432
+ tag_list_text = get_formatted_tag_list()
433
+
434
  accumulated_data = {}
435
+ prompt_used = ""
436
  fcot_trace = {}
437
  full_raw_text = ""
438
+ loop = asyncio.get_event_loop()
439
+
440
+ if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
441
 
442
  for attempt in range(max_retries + 1):
443
  raw_text = ""
 
445
  missing = validate_parsed_data(accumulated_data, is_text_only)
446
  yield f"Validation failed. Missing or incomplete fields: {missing}. Initiating Iterative Reprompt (Attempt {attempt}/{max_retries}) to acquire remaining factuality components...\n"
447
 
448
+ # REPROMPT CONSTRUCTION
449
  prompt_text = (
450
  f"SYSTEM: Review the previous attempt which failed validation.\n"
451
  f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
 
466
  save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
467
  else:
468
  if reasoning_method == "fcot":
469
+ yield "Starting Fractal Chain of Thought (Vertex FCoT)..."
470
  chat = client.chats.create(model=model_name, config=config)
471
 
472
  macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
473
  save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
474
+ inputs1 = [macro_prompt]
475
  if video_part: inputs1.insert(0, video_part)
476
  else: inputs1[0] = "NOTE: Text Only Analysis.\n" + inputs1[0]
477
 
 
479
  macro_hypothesis = res1.text
480
  save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
481
  fcot_trace['macro'] = macro_hypothesis
 
482
 
483
  meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
484
  save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
 
487
  save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
488
  fcot_trace['meso'] = micro_observations
489
 
490
+ synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
491
  save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
492
  res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
493
  raw_text = res3.text
494
  save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
495
+ prompt_used = f"FCoT (Vertex):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
 
496
  else:
497
+ template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
498
+ prompt_text = template.format(
499
+ system_persona=system_persona, caption=caption, transcript=transcript,
500
+ toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
501
+ )
502
+ contents =[]
503
  if video_part: contents =[video_part, prompt_text]
504
  else: contents =[f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"]
505
  prompt_used = prompt_text
506
+ save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
507
+ yield f"Generating Labels (Vertex {reasoning_method.upper()})..."
508
+ response = await loop.run_in_executor(None, lambda: client.models.generate_content(model=model_name, contents=contents, config=config))
 
 
 
509
  raw_text = response.text
510
+ save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
 
 
 
 
511
 
512
  if raw_text:
513
  full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
 
524
 
525
  missing_fields = validate_parsed_data(accumulated_data, is_text_only)
526
  if not missing_fields:
527
+ yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
528
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
529
  break
530
 
 
532
  yield f"Max retries reached. Saving incomplete data.\n"
533
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
534
  break
535
+
536
  except Exception as e:
537
+ yield f"ERROR: {e}"
538
  logger.error("Vertex Labeling Error", exc_info=True)
539
 
 
540
  async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: str, nrp_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
541
  api_key = nrp_config.get("api_key")
542
  model_name = nrp_config.get("model_name", "gpt-4")
 
554
 
555
  is_text_only = True
556
  system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
557
+
558
+ if nrp_config.get("use_search", False):
559
+ system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
560
+ if nrp_config.get("use_code", False):
561
+ system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
562
 
563
  toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
564
  score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
 
576
  "messages": messages,
577
  "temperature": 0.1
578
  }
579
+
580
+ logger.info(f"[{request_id}] NRP API Call ({attempt_label}) - URL: {base_url}/chat/completions")
581
+ logger.info(f"[{request_id}] NRP API Call - Model: {model_name}")
582
+ logger.info(f"[{request_id}] NRP API Call - Messages count: {len(messages)}")
583
+
584
  def do_request():
585
+ start_time = datetime.datetime.now()
586
+ logger.info(f"[{request_id}] Dispatching requests.post (timeout=600s)...")
587
+
588
  resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
589
+
590
+ elapsed = (datetime.datetime.now() - start_time).total_seconds()
591
+ logger.info(f"[{request_id}] NRP API Response received in {elapsed:.2f}s. Status Code: {resp.status_code}")
592
+
593
  if resp.status_code != 200:
594
+ logger.error(f"[{request_id}] API Error {resp.status_code}: {resp.text}")
595
  raise Exception(f"API Error {resp.status_code}: {resp.text}")
596
+
597
+ resp_json = resp.json()
598
+ usage = resp_json.get("usage", {})
599
+ logger.info(f"[{request_id}] NRP API Usage: {usage}")
600
+
601
+ return resp_json["choices"][0]["message"]["content"]
602
+
603
  return await loop.run_in_executor(None, do_request)
604
 
605
  try:
 
607
  raw_text = ""
608
  if attempt > 0:
609
  missing = validate_parsed_data(accumulated_data, is_text_only)
610
+ yield f"Validation failed. Missing fields: {missing}. Initiating Reprompt (Attempt {attempt}/{max_retries})...\n"
611
+
612
  prompt_text = (
613
  f"SYSTEM: Review the previous attempt which failed validation.\n"
614
  f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
615
  f"PREVIOUS (PARTIAL) DATA: {json.dumps(accumulated_data, indent=2)}\n"
616
  f"MISSING FIELDS: {missing}\n"
617
+ f"INSTRUCTION: Generate the missing fields to complete the schema. You MUST provide the missing scores for {missing}.\n"
618
+ f"Output the FULL VALID TOON OBJECT containing all required fields.\n"
619
  f"{toon_schema}"
620
  )
621
+
622
  save_debug_log(request_id, 'prompt', prompt_text, attempt, 'reprompt')
623
+
624
+ yield f" - Sending Reprompt request to NRP API (Model: {model_name}, Timeout: 600s)...\n"
625
  raw_text = await _call_nrp([
626
  {"role": "system", "content": system_persona},
627
  {"role": "user", "content": prompt_text}
628
+ ], attempt_label=f"reprompt_{attempt}")
629
+ yield f" - Received Reprompt response from NRP API.\n\n"
630
+
631
  save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
632
  else:
633
  if reasoning_method == "fcot":
634
  yield "Starting Fractal Chain of Thought (NRP FCoT)...\n"
635
+
636
  macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
637
  macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
638
  save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
639
 
640
  macro_messages =[{"role": "system", "content": system_persona}, {"role": "user", "content": macro_prompt}]
641
+ yield f" - Stage 1: Sending Macro Hypothesis request to NRP API (Timeout: 600s)...\n"
642
+ macro_hypothesis = await _call_nrp(macro_messages, attempt_label="fcot_macro")
643
+ yield f" - Stage 1: Received Macro Hypothesis response.\n"
644
+
645
  save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
646
  fcot_trace['macro'] = macro_hypothesis
647
 
648
  meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
649
  save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
 
650
  meso_messages = macro_messages +[{"role": "assistant", "content": macro_hypothesis}, {"role": "user", "content": meso_prompt}]
651
+
652
+ yield f" - Stage 2: Sending Meso Analysis request to NRP API (Timeout: 600s)...\n"
653
+ micro_observations = await _call_nrp(meso_messages, attempt_label="fcot_meso")
654
+ yield f" - Stage 2: Received Meso Analysis response.\n"
655
+
656
  save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
657
  fcot_trace['meso'] = micro_observations
658
 
659
  synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
660
  save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
 
661
  synthesis_messages = meso_messages +[{"role": "assistant", "content": micro_observations}, {"role": "user", "content": synthesis_prompt}]
662
+
663
+ yield f" - Stage 3: Sending Synthesis/Formatting request to NRP API (Timeout: 600s)...\n"
664
+ raw_text = await _call_nrp(synthesis_messages, attempt_label="fcot_synthesis")
665
+ yield f" - Stage 3: Received Synthesis response.\n\n"
666
+
667
  save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
668
  prompt_used = f"FCoT (NRP):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
669
 
670
  else:
671
+ template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
672
+ prompt_text = template.format(
673
  system_persona=system_persona, caption=caption, transcript=transcript,
674
  toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
675
  )
676
  prompt_text = f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"
677
  prompt_used = prompt_text
678
+ save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
679
+ yield f"Generating Labels (NRP {reasoning_method.upper()})...\n"
680
+ yield f" - Sending Standard request to NRP API (Model: {model_name}, Timeout: 600s)...\n"
681
+
682
  raw_text = await _call_nrp([
683
  {"role": "system", "content": system_persona},
684
  {"role": "user", "content": prompt_text}
685
+ ], attempt_label=f"standard_{reasoning_method}")
686
+
687
+ yield f" - Received response from NRP API.\n\n"
688
+ save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
689
 
690
  if raw_text:
691
  full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
 
702
 
703
  missing_fields = validate_parsed_data(accumulated_data, is_text_only)
704
  if not missing_fields:
705
+ yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
706
  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
707
  break
708
 
709
  if attempt == max_retries:
710
+ yield f"Max retries reached. Saving incomplete data.\n"
711
+ yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_