Spaces:

GlazedDon0t
/

liarMP4

Sleeping

App Files Files Community

GlazedDon0t commited on Mar 12

Commit

4b424d6

1 Parent(s): 1c08c4a

final p1

Browse files

Files changed (6) hide show

frontend/src/App.tsx +121 -125
src/app.py +205 -72
src/benchmarking.py +229 -224
src/common_utils.py +110 -104
src/factuality_logic.py +12 -23
src/inference_logic.py +321 -207

frontend/src/App.tsx CHANGED Viewed

@@ -10,77 +10,74 @@ import {
 function App() {
   const[activeTab, setActiveTab] = useState('home');
-  const [logs, setLogs] = useState<string>('System Ready.\n');
-  const[isProcessing, setIsProcessing] = useState(false);
   const logContainerRef = useRef<HTMLDivElement>(null);
   // Processing Config State
   const [modelProvider, setModelProvider] = useState('nrp');
-  const [apiKey, setApiKey] = useState('');
   const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
   const[modelName, setModelName] = useState('qwen3'); // Default
   const[projectId, setProjectId] = useState('');
   const [location, setLocation] = useState('us-central1');
-  const [includeComments, setIncludeComments] = useState(false);
   const[reasoningMethod, setReasoningMethod] = useState('cot');
   const [promptTemplate, setPromptTemplate] = useState('standard');
-  const [customQuery, setCustomQuery] = useState('');
-  const[maxRetries, setMaxRetries] = useState(1);
   const [availablePrompts, setAvailablePrompts] = useState<any[]>([]);
-  // Predictive Config
-  const [predictiveModelType, setPredictiveModelType] = useState('logistic');
-  const [predictiveResult, setPredictiveResult] = useState<any>(null);
   // Data States
-  const [queueList, setQueueList] = useState<any[]>([]);
-  const[selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
-  const[expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
   const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
-  const[singleLinkInput, setSingleLinkInput] = useState('');
   const [profileList, setProfileList] = useState<any[]>([]);
-  const [selectedProfile, setSelectedProfile] = useState<any>(null);
-  const[profilePosts, setProfilePosts] = useState<any[]>([]);
-  const [communityDatasets, setCommunityDatasets] = useState<any[]>([]);
-  const [communityAnalysis, setCommunityAnalysis] = useState<any>(null);
-  const[integrityBoard, setIntegrityBoard] = useState<any[]>([]);
   const[datasetList, setDatasetList] = useState<any[]>([]);
-  const [selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
-  const [lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
   const [benchmarks, setBenchmarks] = useState<any>(null);
-  const[leaderboard, setLeaderboard] = useState<any[]>([]);
-  const [refreshTrigger, setRefreshTrigger] = useState(0);
   // Tags
-  const [configuredTags, setConfiguredTags] = useState<any>({});
   // Manual Labeling State
-  const [manualLink, setManualLink] = useState('');
-  const[manualCaption, setManualCaption] = useState('');
   const [manualTags, setManualTags] = useState('');
   const[manualReasoning, setManualReasoning] = useState('');
-  const [manualScores, setManualScores] = useState({
       visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
       va: 5, vc: 5, ac: 5, final: 50
   });
-  const[showRubric, setShowRubric] = useState(false);
-  const [aiReference, setAiReference] = useState<any>(null);
-  const [labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
-  const [labelFilter, setLabelFilter] = useState('');
   // Agent Chat State
-  const[agentInput, setAgentInput] = useState('');
-  const [agentMessages, setAgentMessages] = useState<any[]>([]);
-  const [agentThinking, setAgentThinking] = useState(false);
-  const[agentEndpoint, setAgentEndpoint] = useState('/a2a');
-  const [agentMethod, setAgentMethod] = useState('agent.process');
   const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
   // Resampling configuration
-  const[resampleCount, setResampleCount] = useState<number>(1);
   // Drag Selection references
   const isDraggingQueueRef = useRef(false);
@@ -89,9 +86,9 @@ function App() {
   // Quick Demo State
   const[demoLink, setDemoLink] = useState('');
   const [demoLogs, setDemoLogs] = useState('');
-  const [demoIsProcessing, setDemoIsProcessing] = useState(false);
   const[demoResult, setDemoResult] = useState<any>(null);
-  const[showDemoConfig, setShowDemoConfig] = useState(false);
   const demoLogContainerRef = useRef<HTMLDivElement>(null);
   useEffect(() => {
@@ -125,7 +122,6 @@ function App() {
         setLastQueueIndex(null);
     }
     if (activeTab === 'profiles') load('/profiles/list', setProfileList);
-    if (activeTab === 'community') load('/community/list_datasets', setCommunityDatasets);
     if (activeTab === 'analytics') load('/analytics/account_integrity', setIntegrityBoard);
     if (activeTab === 'dataset' || activeTab === 'manual' || activeTab === 'groundtruth') load('/dataset/list', setDatasetList);
     if (activeTab === 'manual') load('/queue/list', setQueueList);
@@ -409,28 +405,6 @@ function App() {
       } catch(e: any) { alert("Network error: " + e.toString()); }
   };
-  const analyzeComments = async (id: string) => {
-      setCommunityAnalysis({ verdict: "Analyzing..." });
-      const res = await fetch('/community/analyze', {
-          method: 'POST', headers: {'Content-Type': 'application/json'},
-          body: JSON.stringify({ dataset_id: id })
-      });
-      setCommunityAnalysis(await res.json());
-  };
-  const runPredictiveTraining = async (useVisual: boolean) => {
-      setPredictiveResult({ status: 'training' });
-      try {
-          const res = await fetch('/benchmarks/train_predictive', {
-              method: 'POST', headers: {'Content-Type': 'application/json'},
-              body: JSON.stringify({ use_visual_meta: useVisual, model_type: predictiveModelType })
-          });
-          const data = await res.json();
-          setPredictiveResult(data);
-          setRefreshTrigger(p => p+1);
-      } catch (e) { setPredictiveResult({ error: "Failed to train." }); }
-  };
   const queueUnlabeledPosts = async () => {
       const unlabeled = profilePosts.filter(p => !p.is_labeled).map(p => p.link);
       if(unlabeled.length === 0) return alert("All posts already labeled!");
@@ -547,6 +521,9 @@ function App() {
       fd.append('prompt_template', promptTemplate);
       fd.append('custom_query', customQuery);
       fd.append('max_reprompts', maxRetries.toString());
       try {
           const res = await fetch('/queue/run', { method: 'POST', body: fd });
@@ -597,6 +574,9 @@ function App() {
           fd.append('prompt_template', promptTemplate);
           fd.append('custom_query', customQuery);
           fd.append('max_reprompts', maxRetries.toString());
           setDemoLogs(prev => prev + '[SYSTEM] Sending analysis payload to model server...\n');
@@ -736,13 +716,11 @@ function App() {
            {[
                {id:'home', l:'Home & Benchmarks', i:Home},
                {id:'agent', l:'Agent Nexus', i:Bot},
-               {id:'predictive', l:'Predictive Sandbox', i:FlaskConical},
                {id:'queue', l:'Ingest Queue', i:List},
                {id:'profiles', l:'User Profiles', i:Users},
                {id:'manual', l:'Labeling Studio', i:PenTool},
                {id:'dataset', l:'Data Manager', i:Archive},
                {id:'groundtruth', l:'Ground Truth (Verified)', i:ShieldCheck},
-               {id:'community', l:'Community Trust', i:MessageSquare},
                {id:'analytics', l:'Analytics', i:BarChart2}
            ].map(t => (
               <button key={t.id} onClick={() => setActiveTab(t.id)}
@@ -817,6 +795,7 @@ function App() {
                                 <div className="space-y-3">
                                     <label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Inference Strategy</label>
                                     <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
                                         <option value="cot">Standard Chain of Thought</option>
                                         <option value="fcot">Fractal Chain of Thought</option>
                                     </select>
@@ -825,6 +804,16 @@ function App() {
                                             <option key={p.id} value={p.id}>{p.name}</option>
                                         )) : <option value="standard">Standard</option>}
                                     </select>
                                 </div>
                             </div>
                         )}
@@ -1012,11 +1001,11 @@ function App() {
                                         <th className="p-3">Model</th>
                                         <th className="p-3">Prompt</th>
                                         <th className="p-3">Reasoning</th>
                                         <th className="p-3 text-center">FCoT Depth</th>
                                         <th className="p-3 text-right text-emerald-400">Accuracy</th>
                                         <th className="p-3 text-right">Comp. MAE</th>
                                         <th className="p-3 text-right">Tag Acc</th>
-                                        <th className="p-3 text-right">Samples</th>
                                         <th className="p-3"></th>
                                     </tr>
                                 </thead>
@@ -1027,17 +1016,18 @@ function App() {
                                             <td className="p-3 font-mono text-white">{row.model}</td>
                                             <td className="p-3">{row.prompt}</td>
                                             <td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
                                             <td className="p-3 text-center text-slate-400 font-mono">{row.fcot_depth ?? 0}</td>
                                             <td className="p-3 text-right font-bold text-emerald-400">{row.accuracy}%</td>
                                             <td className="p-3 text-right font-mono text-amber-400">{row.comp_mae}</td>
                                             <td className="p-3 text-right">{row.tag_acc}%</td>
-                                            <td className="p-3 text-right text-slate-500">{row.samples}</td>
-                                            <td className="p-3 text-center" title={row.params}>
                                                 <div className="group relative">
                                                     <HelpCircle className="w-4 h-4 text-slate-600 cursor-help"/>
                                                     <div className="absolute right-0 bottom-6 w-64 p-3 bg-black border border-slate-700 rounded shadow-xl hidden group-hover:block z-50 text-[10px] whitespace-pre-wrap text-left">
                                                         <div className="font-bold mb-1 text-slate-400">Config Params</div>
-                                                        {row.params}
                                                     </div>
                                                 </div>
                                             </td>
@@ -1050,6 +1040,54 @@ function App() {
                             </table>
                         </div>
                     </div>
                 </div>
             )}
@@ -1124,6 +1162,7 @@ function App() {
                                 <div className="space-y-1 mt-2">
                                     <label className="text-[10px] text-slate-500">Reasoning Method</label>
                                     <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
                                         <option value="cot">Standard Chain of Thought</option>
                                         <option value="fcot">Fractal Chain of Thought</option>
                                     </select>
@@ -1211,31 +1250,6 @@ function App() {
                 </div>
             )}
-            {/* PREDICTIVE SANDBOX */}
-            {activeTab === 'predictive' && (
-                <div className="flex h-full gap-6">
-                    <div className="w-1/3 bg-slate-900/50 border border-slate-800 rounded-xl p-6 flex flex-col gap-6">
-                        <div>
-                            <h2 className="text-lg font-bold text-white flex items-center gap-2"><FlaskConical className="w-5 h-5"/> Model Sandbox</h2>
-                            <p className="text-xs text-slate-400">Train models on the text features of the current Ground Truth dataset.</p>
-                        </div>
-                        <button onClick={() => runPredictiveTraining(false)} className="w-full py-3 bg-indigo-600 hover:bg-indigo-500 text-white rounded font-bold text-xs">Train Baseline</button>
-                    </div>
-                    <div className="flex-1 bg-slate-900/50 border border-slate-800 rounded-xl p-6 relative overflow-hidden overflow-y-auto">
-                        {predictiveResult ? (
-                            predictiveResult.status === 'training' ? (
-                                <div className="absolute inset-0 flex items-center justify-center text-indigo-400 animate-pulse">Training Model...</div>
-                            ) : predictiveResult.error ? ( <div className="text-red-400">{predictiveResult.error}</div> ) : (
-                                <div className="space-y-6">
-                                    <div className="text-xl font-mono text-white">Training Complete ({predictiveResult.type})</div>
-                                    <pre className="text-xs text-slate-400 bg-black p-4 rounded">{JSON.stringify(predictiveResult, null, 2)}</pre>
-                                </div>
-                            )
-                        ) : <div className="flex h-full items-center justify-center text-slate-600">Ready to train.</div>}
-                    </div>
-                </div>
-            )}
             {/* QUEUE TAB */}
             {activeTab === 'queue' && (
                 <div className="flex h-full gap-6">
@@ -1320,6 +1334,7 @@ function App() {
                         <div className="space-y-1 mt-2">
                             <label className="text-[10px] text-slate-500">Reasoning Method</label>
                             <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
                                 <option value="cot">Standard Chain of Thought</option>
                                 <option value="fcot">Fractal Chain of Thought</option>
                             </select>
@@ -1333,6 +1348,18 @@ function App() {
                             </select>
                         </div>
                         {/* Process Controls */}
                         {isProcessing ? (
                             <button onClick={stopProcessing} className="w-full py-2 bg-red-600 hover:bg-red-500 text-white rounded font-bold text-xs flex items-center justify-center gap-2 animate-pulse">
@@ -1721,7 +1748,7 @@ function App() {
                                                      <span className="capitalize text-slate-300 font-bold">{k}</span>
                                                      <span className="text-indigo-400 font-mono font-bold">{(manualScores as any)[k]}/10</span>
                                                  </div>
-                                                 <input type="range" min="1" max="10" value={(manualScores as any)[k]} onChange={e => setManualScores({...manualScores, [k]: parseInt(e.target.value)})} className="w-full accent-indigo-500"/>
                                              </div>
                                          ))}
                                      </div>
@@ -1813,37 +1840,6 @@ function App() {
                 </div>
             )}
-            {/* COMMUNITY AND ANALYTICS TABS (UNCHANGED) */}
-            {activeTab === 'community' && (
-                <div className="flex h-full gap-6">
-                    <div className="w-1/3 bg-slate-900/50 border border-slate-800 rounded-xl overflow-auto">
-                        <div className="p-3 bg-slate-950 border-b border-slate-800 text-xs font-bold text-slate-400">Comment Datasets</div>
-                        {communityDatasets.map((d, i) => (
-                            <div key={i} onClick={() => analyzeComments(d.id)} className="p-4 border-b border-slate-800/50 cursor-pointer hover:bg-white/5">
-                                <div className="text-xs font-mono text-indigo-400 mb-1">{d.id}</div>
-                                <div className="text-[10px] text-slate-500">{d.count} comments</div>
-                            </div>
-                        ))}
-                    </div>
-                    <div className="flex-1 flex flex-col justify-center items-center bg-slate-900/20 border border-slate-800 rounded-xl p-8">
-                        {communityAnalysis ? (
-                            <div className="text-center w-full max-w-md">
-                                <div className="text-xs uppercase text-slate-500 mb-2 tracking-widest">Community Quantization</div>
-                                <h2 className="text-5xl font-bold text-white mb-2">{communityAnalysis.trust_score?.toFixed(0)}<span className="text-xl text-slate-600">/100</span></h2>
-                                <div className={`text-lg font-bold mb-8 px-4 py-1 rounded-full inline-block ${communityAnalysis.trust_score < 40 ? 'bg-red-500/10 text-red-400' : 'bg-emerald-500/10 text-emerald-400'}`}>
-                                    {communityAnalysis.verdict}
-                                </div>
-                            </div>
-                        ) : (
-                            <div className="text-slate-600 flex flex-col items-center">
-                                <MessageSquare className="w-12 h-12 mb-4 opacity-20"/>
-                                <span>Select a dataset to analyze community sentiment.</span>
-                            </div>
-                        )}
-                    </div>
-                </div>
-            )}
             {activeTab === 'analytics' && (
                 <div className="h-full overflow-auto">
                     <div className="flex items-center justify-between mb-4">

 function App() {
   const[activeTab, setActiveTab] = useState('home');
+  const[logs, setLogs] = useState<string>('System Ready.\n');
+  const [isProcessing, setIsProcessing] = useState(false);
   const logContainerRef = useRef<HTMLDivElement>(null);
   // Processing Config State
   const [modelProvider, setModelProvider] = useState('nrp');
+  const[apiKey, setApiKey] = useState('');
   const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
   const[modelName, setModelName] = useState('qwen3'); // Default
   const[projectId, setProjectId] = useState('');
   const [location, setLocation] = useState('us-central1');
+  const[includeComments, setIncludeComments] = useState(false);
   const[reasoningMethod, setReasoningMethod] = useState('cot');
   const [promptTemplate, setPromptTemplate] = useState('standard');
+  const[customQuery, setCustomQuery] = useState('');
+  const [maxRetries, setMaxRetries] = useState(1);
   const [availablePrompts, setAvailablePrompts] = useState<any[]>([]);
+  const [useSearch, setUseSearch] = useState(false);
+  const[useCode, setUseCode] = useState(false);
   // Data States
+  const[queueList, setQueueList] = useState<any[]>([]);
+  const [selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
+  const [expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
   const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
+  const [singleLinkInput, setSingleLinkInput] = useState('');
   const [profileList, setProfileList] = useState<any[]>([]);
+  const[selectedProfile, setSelectedProfile] = useState<any>(null);
+  const [profilePosts, setProfilePosts] = useState<any[]>([]);
+  const [integrityBoard, setIntegrityBoard] = useState<any[]>([]);
   const[datasetList, setDatasetList] = useState<any[]>([]);
+  const[selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
+  const[lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
   const [benchmarks, setBenchmarks] = useState<any>(null);
+  const [leaderboard, setLeaderboard] = useState<any[]>([]);
+  const[refreshTrigger, setRefreshTrigger] = useState(0);
   // Tags
+  const[configuredTags, setConfiguredTags] = useState<any>({});
   // Manual Labeling State
+  const[manualLink, setManualLink] = useState('');
+  const [manualCaption, setManualCaption] = useState('');
   const [manualTags, setManualTags] = useState('');
   const[manualReasoning, setManualReasoning] = useState('');
+  const[manualScores, setManualScores] = useState({
       visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
       va: 5, vc: 5, ac: 5, final: 50
   });
+  const [showRubric, setShowRubric] = useState(false);
+  const[aiReference, setAiReference] = useState<any>(null);
+  const[labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
+  const[labelFilter, setLabelFilter] = useState('');
   // Agent Chat State
+  const [agentInput, setAgentInput] = useState('');
+  const[agentMessages, setAgentMessages] = useState<any[]>([]);
+  const[agentThinking, setAgentThinking] = useState(false);
+  const [agentEndpoint, setAgentEndpoint] = useState('/a2a');
+  const[agentMethod, setAgentMethod] = useState('agent.process');
   const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
   // Resampling configuration
+  const [resampleCount, setResampleCount] = useState<number>(1);
   // Drag Selection references
   const isDraggingQueueRef = useRef(false);
   // Quick Demo State
   const[demoLink, setDemoLink] = useState('');
   const [demoLogs, setDemoLogs] = useState('');
+  const[demoIsProcessing, setDemoIsProcessing] = useState(false);
   const[demoResult, setDemoResult] = useState<any>(null);
+  const [showDemoConfig, setShowDemoConfig] = useState(false);
   const demoLogContainerRef = useRef<HTMLDivElement>(null);
   useEffect(() => {
         setLastQueueIndex(null);
     }
     if (activeTab === 'profiles') load('/profiles/list', setProfileList);
     if (activeTab === 'analytics') load('/analytics/account_integrity', setIntegrityBoard);
     if (activeTab === 'dataset' || activeTab === 'manual' || activeTab === 'groundtruth') load('/dataset/list', setDatasetList);
     if (activeTab === 'manual') load('/queue/list', setQueueList);
       } catch(e: any) { alert("Network error: " + e.toString()); }
   };
   const queueUnlabeledPosts = async () => {
       const unlabeled = profilePosts.filter(p => !p.is_labeled).map(p => p.link);
       if(unlabeled.length === 0) return alert("All posts already labeled!");
       fd.append('prompt_template', promptTemplate);
       fd.append('custom_query', customQuery);
       fd.append('max_reprompts', maxRetries.toString());
+      fd.append('use_search', useSearch.toString());
+      fd.append('use_code', useCode.toString());
       try {
           const res = await fetch('/queue/run', { method: 'POST', body: fd });
           fd.append('prompt_template', promptTemplate);
           fd.append('custom_query', customQuery);
           fd.append('max_reprompts', maxRetries.toString());
+          fd.append('use_search', useSearch.toString());
+          fd.append('use_code', useCode.toString());
           setDemoLogs(prev => prev + '[SYSTEM] Sending analysis payload to model server...\n');
            {[
                {id:'home', l:'Home & Benchmarks', i:Home},
                {id:'agent', l:'Agent Nexus', i:Bot},
                {id:'queue', l:'Ingest Queue', i:List},
                {id:'profiles', l:'User Profiles', i:Users},
                {id:'manual', l:'Labeling Studio', i:PenTool},
                {id:'dataset', l:'Data Manager', i:Archive},
                {id:'groundtruth', l:'Ground Truth (Verified)', i:ShieldCheck},
                {id:'analytics', l:'Analytics', i:BarChart2}
            ].map(t => (
               <button key={t.id} onClick={() => setActiveTab(t.id)}
                                 <div className="space-y-3">
                                     <label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Inference Strategy</label>
                                     <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
+                                        <option value="none">Direct (No CoT)</option>
                                         <option value="cot">Standard Chain of Thought</option>
                                         <option value="fcot">Fractal Chain of Thought</option>
                                     </select>
                                             <option key={p.id} value={p.id}>{p.name}</option>
                                         )) : <option value="standard">Standard</option>}
                                     </select>
+                                    <label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1 mt-3">Agentic Tools</label>
+                                    <label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
+                                        <input type="checkbox" className="accent-indigo-500" checked={useSearch} onChange={e => setUseSearch(e.target.checked)} />
+                                        Enable Web Search Retrieval
+                                    </label>
+                                    <label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
+                                        <input type="checkbox" className="accent-indigo-500" checked={useCode} onChange={e => setUseCode(e.target.checked)} />
+                                        Enable Code Execution
+                                    </label>
                                 </div>
                             </div>
                         )}
                                         <th className="p-3">Model</th>
                                         <th className="p-3">Prompt</th>
                                         <th className="p-3">Reasoning</th>
+                                        <th className="p-3 text-center">Tools</th>
                                         <th className="p-3 text-center">FCoT Depth</th>
                                         <th className="p-3 text-right text-emerald-400">Accuracy</th>
                                         <th className="p-3 text-right">Comp. MAE</th>
                                         <th className="p-3 text-right">Tag Acc</th>
                                         <th className="p-3"></th>
                                     </tr>
                                 </thead>
                                             <td className="p-3 font-mono text-white">{row.model}</td>
                                             <td className="p-3">{row.prompt}</td>
                                             <td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
+                                            <td className="p-3 text-center text-sky-400 font-mono text-[10px]">{row.tools || 'None'}</td>
                                             <td className="p-3 text-center text-slate-400 font-mono">{row.fcot_depth ?? 0}</td>
                                             <td className="p-3 text-right font-bold text-emerald-400">{row.accuracy}%</td>
                                             <td className="p-3 text-right font-mono text-amber-400">{row.comp_mae}</td>
                                             <td className="p-3 text-right">{row.tag_acc}%</td>
+                                            <td className="p-3 text-center">
                                                 <div className="group relative">
                                                     <HelpCircle className="w-4 h-4 text-slate-600 cursor-help"/>
                                                     <div className="absolute right-0 bottom-6 w-64 p-3 bg-black border border-slate-700 rounded shadow-xl hidden group-hover:block z-50 text-[10px] whitespace-pre-wrap text-left">
                                                         <div className="font-bold mb-1 text-slate-400">Config Params</div>
+                                                        <div>{row.params}</div>
+                                                        <div className="mt-2 pt-2 border-t border-slate-800 text-slate-400 font-bold">Samples: {row.samples}</div>
                                                     </div>
                                                 </div>
                                             </td>
                             </table>
                         </div>
                     </div>
+                    {/* Detailed Vector Accuracies */}
+                    <div className="bg-slate-900/50 border border-slate-800 rounded-xl p-6 mt-6 mb-8">
+                        <h3 className="text-sm font-bold text-white uppercase mb-4 flex items-center gap-2">
+                            <BarChart2 className="w-4 h-4 text-sky-400"/> Detailed Vector Error Analysis (MAE)
+                        </h3>
+                        <div className="overflow-x-auto">
+                            <table className="w-full text-left text-xs text-slate-400">
+                                <thead className="bg-slate-950 text-slate-500 uppercase">
+                                    <tr>
+                                        <th className="p-3">Model</th>
+                                        <th className="p-3">Prompt</th>
+                                        <th className="p-3">Reasoning</th>
+                                        <th className="p-3">Tools / Techniques</th>
+                                        <th className="p-3 text-right">Vis</th>
+                                        <th className="p-3 text-right">Aud</th>
+                                        <th className="p-3 text-right">Src</th>
+                                        <th className="p-3 text-right">Log</th>
+                                        <th className="p-3 text-right">Emo</th>
+                                        <th className="p-3 text-right">V-A</th>
+                                        <th className="p-3 text-right">V-C</th>
+                                        <th className="p-3 text-right">A-C</th>
+                                    </tr>
+                                </thead>
+                                <tbody className="divide-y divide-slate-800">
+                                    {leaderboard && leaderboard.map((row, i) => (
+                                        <tr key={i} className="hover:bg-white/5">
+                                            <td className="p-3 font-mono text-white">{row.model}</td>
+                                            <td className="p-3">{row.prompt}</td>
+                                            <td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
+                                            <td className="p-3 text-sky-400 font-mono text-[10px]">{row.tools || 'None'}</td>
+                                            <td className="p-3 text-right font-mono">{row.err_visual_score ?? '-'}</td>
+                                            <td className="p-3 text-right font-mono">{row.err_audio_score ?? '-'}</td>
+                                            <td className="p-3 text-right font-mono">{row.err_source_score ?? '-'}</td>
+                                            <td className="p-3 text-right font-mono">{row.err_logic_score ?? '-'}</td>
+                                            <td className="p-3 text-right font-mono">{row.err_emotion_score ?? '-'}</td>
+                                            <td className="p-3 text-right font-mono">{row.err_align_video_audio ?? '-'}</td>
+                                            <td className="p-3 text-right font-mono">{row.err_align_video_caption ?? '-'}</td>
+                                            <td className="p-3 text-right font-mono">{row.err_align_audio_caption ?? '-'}</td>
+                                        </tr>
+                                    ))}
+                                    {(!leaderboard || leaderboard.length === 0) && (
+                                        <tr><td colSpan={12} className="p-4 text-center text-slate-600">No detailed benchmark data available.</td></tr>
+                                    )}
+                                </tbody>
+                            </table>
+                        </div>
+                    </div>
                 </div>
             )}
                                 <div className="space-y-1 mt-2">
                                     <label className="text-[10px] text-slate-500">Reasoning Method</label>
                                     <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
+                                        <option value="none">Direct (No CoT)</option>
                                         <option value="cot">Standard Chain of Thought</option>
                                         <option value="fcot">Fractal Chain of Thought</option>
                                     </select>
                 </div>
             )}
             {/* QUEUE TAB */}
             {activeTab === 'queue' && (
                 <div className="flex h-full gap-6">
                         <div className="space-y-1 mt-2">
                             <label className="text-[10px] text-slate-500">Reasoning Method</label>
                             <select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
+                                <option value="none">Direct (No CoT)</option>
                                 <option value="cot">Standard Chain of Thought</option>
                                 <option value="fcot">Fractal Chain of Thought</option>
                             </select>
                             </select>
                         </div>
+                        <div className="space-y-2 mt-2">
+                            <label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Agentic Tools</label>
+                            <label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
+                                <input type="checkbox" className="accent-indigo-500" checked={useSearch} onChange={e => setUseSearch(e.target.checked)} />
+                                Enable Web Search Retrieval
+                            </label>
+                            <label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
+                                <input type="checkbox" className="accent-indigo-500" checked={useCode} onChange={e => setUseCode(e.target.checked)} />
+                                Enable Code Execution
+                            </label>
+                        </div>
                         {/* Process Controls */}
                         {isProcessing ? (
                             <button onClick={stopProcessing} className="w-full py-2 bg-red-600 hover:bg-red-500 text-white rounded font-bold text-xs flex items-center justify-center gap-2 animate-pulse">
                                                      <span className="capitalize text-slate-300 font-bold">{k}</span>
                                                      <span className="text-indigo-400 font-mono font-bold">{(manualScores as any)[k]}/10</span>
                                                  </div>
+                                                 <input type="range" min="1" max="10" value={(manualScores as any)[k]} onChange={e => setManualScores({...manualScores,[k]: parseInt(e.target.value)})} className="w-full accent-indigo-500"/>
                                              </div>
                                          ))}
                                      </div>
                 </div>
             )}
             {activeTab === 'analytics' && (
                 <div className="h-full overflow-auto">
                     <div className="flex items-center justify-between mb-4">

src/app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import os
 import sys
-# --- FIX: Ensure 'src' is in sys.path so sibling imports work ---
 current_dir = os.path.dirname(os.path.abspath(__file__))
 if current_dir not in sys.path:
     sys.path.append(current_dir)
@@ -30,11 +30,12 @@ import agent_logic
 import common_utils
 from toon_parser import parse_veracity_toon
-from labeling_logic import PROMPT_VARIANTS, LABELING_PROMPT_TEMPLATE, FCOT_MACRO_PROMPT
 import benchmarking
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 LITE_MODE = os.getenv("LITE_MODE", "true").lower() == "true"
 app = FastAPI()
@@ -78,19 +79,19 @@ except Exception as e:
     agent_mount_status = f"error_{str(e)}"
 # --- Static Files & Frontend ---
-STATIC_DIR = "/app/static"
-if not os.path.isdir(STATIC_DIR):
-    if os.path.isdir("/usr/share/vchat/static"):
-        STATIC_DIR = "/usr/share/vchat/static"
-    elif os.path.isdir("frontend/dist"):
-        STATIC_DIR = "frontend/dist"
-    else:
-        STATIC_DIR = "static"
-        os.makedirs(STATIC_DIR, exist_ok=True)
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
-# --- FIX: Explicitly mount assets for Vite support ---
 assets_path = os.path.join(STATIC_DIR, "assets")
 if os.path.exists(assets_path):
     app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
@@ -227,10 +228,6 @@ async def get_benchmark_stats():
 async def get_benchmark_leaderboard():
     return benchmarking.generate_leaderboard()
-@app.post("/benchmarks/train_predictive")
-async def run_predictive_training(config: dict = Body(...)):
-    return benchmarking.train_predictive_sandbox(config)
 @app.get("/config/prompts")
 async def list_prompts():
     return [{"id": k, "name": v['description']} for k, v in PROMPT_VARIANTS.items()]
@@ -260,7 +257,7 @@ async def list_all_tags():
                     t = t.strip()
                     if t: tags_count[t] = tags_count.get(t, 0) + 1
     sorted_tags = sorted(tags_count.items(), key=lambda x: x[1], reverse=True)
-    return [{"name": k, "count": v} for k, v in sorted_tags]
 @app.post("/extension/ingest")
 async def extension_ingest_link(request: Request):
@@ -304,7 +301,7 @@ async def promote_to_ground_truth(request: Request):
     try:
         data = await request.json()
         target_ids = data.get("ids",[])
-        if not target_ids and data.get("id"): target_ids = [data.get("id")]
         if not target_ids: return JSONResponse({"status": "error", "message": "No IDs provided"}, status_code=400)
@@ -362,7 +359,7 @@ async def delete_ground_truth(request: Request):
     try:
         data = await request.json()
         target_ids = data.get("ids",[])
-        if not target_ids and data.get("id"): target_ids = [data.get("id")]
         if not target_ids: raise HTTPException(status_code=400)
         target_ids =[str(t) for t in target_ids]
@@ -619,6 +616,29 @@ async def save_manual_label(request: Request):
             writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS, extrasaction='ignore')
             writer.writeheader()
             writer.writerows(rows)
         update_queue_status(link, "Processed")
         return {"status": "success", "id": tweet_id}
@@ -626,29 +646,6 @@ async def save_manual_label(request: Request):
         logger.error(f"Save Manual Error: {e}")
         return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
-@app.get("/community/list_datasets")
-async def list_community_datasets():
-    path = Path("data/comments")
-    files =[]
-    if path.exists():
-        for f in path.glob("*.csv"):
-            files.append({"id": f.stem, "count": sum(1 for _ in open(f, encoding='utf-8'))-1})
-    return files
-@app.post("/community/analyze")
-async def analyze_community(dataset_id: str = Body(..., embed=True)):
-    path = Path(f"data/comments/{dataset_id}.csv")
-    if not path.exists(): raise HTTPException(status_code=404)
-    comments = list(common_utils.robust_read_csv(path))
-    if not comments: return {"score": 0, "verdict": "No Data"}
-    s_keys =["fake", "lie", "staged", "bs", "propaganda", "ai", "deepfake"]
-    t_keys =["true", "real", "confirmed", "fact", "source", "proof"]
-    s_count = sum(1 for c in comments if any(k in c['text'].lower() for k in s_keys))
-    t_count = sum(1 for c in comments if any(k in c['text'].lower() for k in t_keys))
-    score = max(0, min(100, 50 + (t_count * 2) - (s_count * 5)))
-    verdict = "Community Skepticism" if score < 30 else "Community Verification" if score > 70 else "Neutral/Mixed"
-    return {"dataset_id": dataset_id, "trust_score": score, "verdict": verdict, "details": {"skeptical_comments": s_count, "trusting_comments": t_count}}
 @app.get("/dataset/list")
 async def get_dataset_list():
     dataset =[]
@@ -673,26 +670,77 @@ async def get_dataset_list():
 async def get_account_integrity():
     id_map = {}
     prof_dir = Path("data/profiles")
     if prof_dir.exists():
         for d in prof_dir.iterdir():
-            for row in common_utils.robust_read_csv(d/"history.csv"):
-                tid = common_utils.extract_tweet_id(row.get('link',''))
-                if tid: id_map[tid] = d.name
     scores_map = {}
     for fname in ["data/dataset.csv", "data/manual_dataset.csv"]:
-        for row in common_utils.robust_read_csv(Path(fname)):
             tid = row.get('id')
             sc = row.get('final_veracity_score', '0')
             try: val = float(re.sub(r'[^\d.]', '', str(sc)))
-            except: val = 0
-            auth = id_map.get(tid, "Unknown")
-            if auth != "Unknown":
-                if auth not in scores_map: scores_map[auth] =[]
-                scores_map[auth].append(val)
-    return sorted([{"username": k, "avg_veracity": round(sum(v)/len(v),1), "posts_labeled": len(v)} for k,v in scores_map.items()], key=lambda x: x['avg_veracity'], reverse=True)
 @app.post("/queue/add")
 async def add_queue_item(link: str = Body(..., embed=True)):
@@ -838,19 +886,6 @@ async def analyze_user_context(request: Request):
         return {"status": "success", "report": rep}
     except Exception as e: return JSONResponse({"error": str(e)}, status_code=500)
-@app.get("/download-dataset")
-async def download_dataset():
-    file_path = Path("data/dataset.csv")
-    if file_path.exists():
-        return FileResponse(path=file_path, filename="dataset.csv", media_type='text/csv')
-    return Response("Dataset not found.", status_code=404)
-@app.get("/model-architecture", response_class=PlainTextResponse)
-async def get_model_architecture():
-    if LITE_MODE: return "Running in LITE mode."
-    if inference_logic.base_model: return str(inference_logic.base_model)
-    return "Model not loaded."
 @app.get("/", response_class=HTMLResponse)
 async def read_root(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
@@ -896,14 +931,15 @@ async def run_queue_processing(
     vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
     nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
     include_comments: bool = Form(False), reasoning_method: str = Form("cot"), prompt_template: str = Form("standard"),
-    custom_query: str = Form(""), max_reprompts: int = Form(1)
 ):
     global STOP_QUEUE_SIGNAL
     STOP_QUEUE_SIGNAL = False
-    gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name, "max_retries": max_reprompts}
-    vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key, "max_retries": max_reprompts, "use_search": True}
-    nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url, "max_retries": max_reprompts}
     sel_p = PROMPT_VARIANTS.get(prompt_template, PROMPT_VARIANTS['standard'])
     system_persona_txt = sel_p['instruction']
@@ -922,7 +958,9 @@ async def run_queue_processing(
     config_params_dict = {
         "reprompts": max_reprompts,
         "include_comments": include_comments,
-        "agent_active": False
     }
     config_params_str = json.dumps(config_params_dict)
@@ -953,7 +991,7 @@ async def run_queue_processing(
                                 gt_data = row
                                 break
-            yield f"data: [START] {link} (Type: {task_type})\n\n"
             tid = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
             assets = await common_utils.prepare_video_assets(link, tid)
@@ -1029,4 +1067,99 @@ async def run_queue_processing(
                      yield f"data:      Emotional Manipul. : AI {s_float(vec_ai.get('emotional_manipulation_score'))} | GT {s_float(gt_data.get('emotional_manipulation_score'))}\n"
                      yield f"data:      Video-Audio Align  : AI {s_float(mod_ai.get('video_audio_score'))} | GT {s_float(gt_data.get('video_audio_score'))}\n"
                      yield f"data:      Video-Caption Align: AI {s_float(mod_ai.get('video_caption_score'))} | GT {s_float(gt_data.get('video_caption_score'))}\n"
-                     yield f"data:      Audio-Caption Align: AI {s_float(mod_ai.get('audio_caption_score'))} | GT {s_float(gt_

 import os
 import sys
+# Ensure 'src' is in sys.path so sibling imports work
 current_dir = os.path.dirname(os.path.abspath(__file__))
 if current_dir not in sys.path:
     sys.path.append(current_dir)
 import common_utils
 from toon_parser import parse_veracity_toon
+from labeling_logic import PROMPT_VARIANTS, LABELING_PROMPT_TEMPLATE, LABELING_PROMPT_TEMPLATE_NO_COT, FCOT_MACRO_PROMPT
 import benchmarking
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
 logger = logging.getLogger(__name__)
 LITE_MODE = os.getenv("LITE_MODE", "true").lower() == "true"
 app = FastAPI()
     agent_mount_status = f"error_{str(e)}"
 # --- Static Files & Frontend ---
+STATIC_DIR = "static"
+if os.path.isdir("/app/static"):
+    STATIC_DIR = "/app/static"
+elif os.path.isdir("/usr/share/vchat/static"):
+    STATIC_DIR = "/usr/share/vchat/static"
+elif os.path.isdir("frontend/dist"):
+    STATIC_DIR = "frontend/dist"
+elif not os.path.isdir(STATIC_DIR):
+    os.makedirs(STATIC_DIR, exist_ok=True)
 app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
+# Explicitly mount assets for Vite support
 assets_path = os.path.join(STATIC_DIR, "assets")
 if os.path.exists(assets_path):
     app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
 async def get_benchmark_leaderboard():
     return benchmarking.generate_leaderboard()
 @app.get("/config/prompts")
 async def list_prompts():
     return [{"id": k, "name": v['description']} for k, v in PROMPT_VARIANTS.items()]
                     t = t.strip()
                     if t: tags_count[t] = tags_count.get(t, 0) + 1
     sorted_tags = sorted(tags_count.items(), key=lambda x: x[1], reverse=True)
+    return[{"name": k, "count": v} for k, v in sorted_tags]
 @app.post("/extension/ingest")
 async def extension_ingest_link(request: Request):
     try:
         data = await request.json()
         target_ids = data.get("ids",[])
+        if not target_ids and data.get("id"): target_ids =[data.get("id")]
         if not target_ids: return JSONResponse({"status": "error", "message": "No IDs provided"}, status_code=400)
     try:
         data = await request.json()
         target_ids = data.get("ids",[])
+        if not target_ids and data.get("id"): target_ids =[data.get("id")]
         if not target_ids: raise HTTPException(status_code=400)
         target_ids =[str(t) for t in target_ids]
             writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS, extrasaction='ignore')
             writer.writeheader()
             writer.writerows(rows)
+        # Add to User Profiles Catalog
+        author = common_utils.extract_twitter_username(link)
+        if author:
+            prof_dir = Path(f"data/profiles/{author}")
+            prof_dir.mkdir(parents=True, exist_ok=True)
+            hist_path = prof_dir / "history.csv"
+            hist_exists = hist_path.exists()
+            existing_links = set()
+            if hist_exists:
+                for r in common_utils.robust_read_csv(hist_path):
+                    existing_links.add(r.get('link'))
+            if link not in existing_links:
+                with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
+                    fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
+                    hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
+                    if not hist_exists: hwriter.writeheader()
+                    hwriter.writerow({
+                        "link": link,
+                        "timestamp": row["timestamp"],
+                        "text": row["caption"],
+                        "ingested_at": row["timestamp"]
+                    })
         update_queue_status(link, "Processed")
         return {"status": "success", "id": tweet_id}
         logger.error(f"Save Manual Error: {e}")
         return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
 @app.get("/dataset/list")
 async def get_dataset_list():
     dataset =[]
 async def get_account_integrity():
     id_map = {}
     prof_dir = Path("data/profiles")
+    prof_dir.mkdir(parents=True, exist_ok=True)
+    existing_links_per_user = {}
     if prof_dir.exists():
         for d in prof_dir.iterdir():
+            if d.is_dir():
+                hist_file = d / "history.csv"
+                existing_links_per_user[d.name] = set()
+                if hist_file.exists():
+                    for row in common_utils.robust_read_csv(hist_file):
+                        link = row.get('link', '')
+                        tid = common_utils.extract_tweet_id(link)
+                        if tid: id_map[tid] = d.name
+                        existing_links_per_user[d.name].add(link)
     scores_map = {}
     for fname in ["data/dataset.csv", "data/manual_dataset.csv"]:
+        path = Path(fname)
+        if not path.exists(): continue
+        for row in common_utils.robust_read_csv(path):
             tid = row.get('id')
+            link = row.get('link', '')
             sc = row.get('final_veracity_score', '0')
+            ts = row.get('timestamp', '')
+            caption = row.get('caption', '')
             try: val = float(re.sub(r'[^\d.]', '', str(sc)))
+            except: val = -1
+            # Require scores to be between 0 and 100
+            if 0 <= val <= 100:
+                auth = common_utils.extract_twitter_username(link) or id_map.get(tid, "Unknown")
+                if auth and auth != "Unknown":
+                    if auth not in scores_map: scores_map[auth] = []
+                    scores_map[auth].append({'val': val, 'ts': ts})
+                    # Auto-add missing accounts/links to the Profile catalog
+                    if auth not in existing_links_per_user:
+                        existing_links_per_user[auth] = set()
+                        Path(f"data/profiles/{auth}").mkdir(parents=True, exist_ok=True)
+                    if link not in existing_links_per_user[auth]:
+                        existing_links_per_user[auth].add(link)
+                        hist_path = Path(f"data/profiles/{auth}/history.csv")
+                        hist_exists = hist_path.exists()
+                        with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
+                            fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
+                            hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
+                            if not hist_exists: hwriter.writeheader()
+                            hwriter.writerow({
+                                "link": link,
+                                "timestamp": ts,
+                                "text": caption,
+                                "ingested_at": ts
+                            })
+    results =[]
+    for k, v in scores_map.items():
+        v_sorted = sorted(v, key=lambda x: x['ts'], reverse=True)
+        decay_factor = 0.9
+        total_weight = 0
+        weighted_sum = 0
+        for i, item in enumerate(v_sorted):
+            weight = decay_factor ** i
+            weighted_sum += item['val'] * weight
+            total_weight += weight
+        avg_veracity = round(weighted_sum / total_weight, 1) if total_weight > 0 else 0
+        results.append({"username": k, "avg_veracity": avg_veracity, "posts_labeled": len(v)})
+    return sorted(results, key=lambda x: x['avg_veracity'], reverse=True)
 @app.post("/queue/add")
 async def add_queue_item(link: str = Body(..., embed=True)):
         return {"status": "success", "report": rep}
     except Exception as e: return JSONResponse({"error": str(e)}, status_code=500)
 @app.get("/", response_class=HTMLResponse)
 async def read_root(request: Request):
     return templates.TemplateResponse("index.html", {"request": request})
     vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
     nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
     include_comments: bool = Form(False), reasoning_method: str = Form("cot"), prompt_template: str = Form("standard"),
+    custom_query: str = Form(""), max_reprompts: int = Form(1),
+    use_search: bool = Form(False), use_code: bool = Form(False)
 ):
     global STOP_QUEUE_SIGNAL
     STOP_QUEUE_SIGNAL = False
+    gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
+    vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
+    nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
     sel_p = PROMPT_VARIANTS.get(prompt_template, PROMPT_VARIANTS['standard'])
     system_persona_txt = sel_p['instruction']
     config_params_dict = {
         "reprompts": max_reprompts,
         "include_comments": include_comments,
+        "agent_active": False,
+        "use_search": use_search,
+        "use_code": use_code
     }
     config_params_str = json.dumps(config_params_dict)
                                 gt_data = row
                                 break
+            yield f"data:[START] {link} (Type: {task_type})\n\n"
             tid = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
             assets = await common_utils.prepare_video_assets(link, tid)
                      yield f"data:      Emotional Manipul. : AI {s_float(vec_ai.get('emotional_manipulation_score'))} | GT {s_float(gt_data.get('emotional_manipulation_score'))}\n"
                      yield f"data:      Video-Audio Align  : AI {s_float(mod_ai.get('video_audio_score'))} | GT {s_float(gt_data.get('video_audio_score'))}\n"
                      yield f"data:      Video-Caption Align: AI {s_float(mod_ai.get('video_caption_score'))} | GT {s_float(gt_data.get('video_caption_score'))}\n"
+                     yield f"data:      Audio-Caption Align: AI {s_float(mod_ai.get('audio_caption_score'))} | GT {s_float(gt_data.get('audio_caption_score'))}\n"
+                     yield f"data:      FINAL VERACITY     : AI {ai_score} | GT {gt_final} | Delta: {delta}\n\n"
+                     comp_path = Path("data/comparison.csv")
+                     comp_exists = comp_path.exists()
+                     with open(comp_path, 'a', newline='', encoding='utf-8') as cf:
+                         cw = csv.DictWriter(cf, fieldnames=["id", "link", "timestamp", "gt_score", "ai_score", "delta", "model", "prompt", "reasoning_method"])
+                         if not comp_exists: cw.writeheader()
+                         cw.writerow({
+                             "id": tid, "link": link, "timestamp": datetime.datetime.now().isoformat(),
+                             "gt_score": gt_final, "ai_score": ai_score, "delta": delta,
+                             "model": active_model_name, "prompt": prompt_template, "reasoning_method": reasoning_method
+                         })
+                try:
+                    with open(d_path, 'a', newline='', encoding='utf-8') as f:
+                        row = {
+                            "id": tid, "link": link, "timestamp": datetime.datetime.now().isoformat(),
+                            "caption": assets['caption'],
+                            "final_veracity_score": ai_score,
+                            "visual_score": parsed['veracity_vectors'].get('visual_integrity_score', 0),
+                            "audio_score": parsed['veracity_vectors'].get('audio_integrity_score', 0),
+                            "source_score": parsed['veracity_vectors'].get('source_credibility_score', 0),
+                            "logic_score": parsed['veracity_vectors'].get('logical_consistency_score', 0),
+                            "emotion_score": parsed['veracity_vectors'].get('emotional_manipulation_score', 0),
+                            "align_video_audio": parsed['modalities'].get('video_audio_score', 0),
+                            "align_video_caption": parsed['modalities'].get('video_caption_score', 0),
+                            "align_audio_caption": parsed['modalities'].get('audio_caption_score', 0),
+                            "classification": parsed['disinformation_analysis'].get('classification', 'None'),
+                            "reasoning": parsed['final_assessment'].get('reasoning', ''),
+                            "tags": ",".join(parsed.get('tags',[])),
+                            "raw_toon": res_data.get("raw_toon", ""),
+                            "config_type": "GenAI",
+                            "config_model": active_model_name,
+                            "config_prompt": prompt_template,
+                            "config_reasoning": reasoning_method,
+                            "config_params": config_params_str
+                        }
+                        writer = csv.DictWriter(f, fieldnames=DATASET_COLUMNS, extrasaction='ignore')
+                        if not exists: writer.writeheader()
+                        writer.writerow(row)
+                except Exception as csv_err: logger.error(f"CSV Write Failed: {csv_err}")
+                try:
+                    ts = datetime.datetime.now().isoformat()
+                    ts_clean = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+                    flat_parsed = parsed.copy()
+                    flat_parsed["raw_toon"] = res_data.get("raw_toon", "")
+                    flat_parsed["meta_info"] = {
+                        "id": tid, "timestamp": ts, "link": link,
+                        "prompt_used": res_data.get("prompt_used", ""),
+                        "model_selection": model_selection,
+                        "config_type": "GenAI",
+                        "config_model": active_model_name,
+                        "config_prompt": prompt_template,
+                        "config_reasoning": reasoning_method,
+                        "config_params": config_params_dict
+                    }
+                    with open(Path(f"data/labels/{tid}_{ts_clean}.json"), 'w', encoding='utf-8') as f: json.dump(flat_parsed, f, indent=2, ensure_ascii=False)
+                except Exception as e: logger.error(f"Sidecar Error: {e}")
+                # Add to User Profiles Catalog
+                author = common_utils.extract_twitter_username(link)
+                if author:
+                    prof_dir = Path(f"data/profiles/{author}")
+                    prof_dir.mkdir(parents=True, exist_ok=True)
+                    hist_path = prof_dir / "history.csv"
+                    hist_exists = hist_path.exists()
+                    existing_links = set()
+                    if hist_exists:
+                        for r in common_utils.robust_read_csv(hist_path):
+                            existing_links.add(r.get('link'))
+                    if link not in existing_links:
+                        with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
+                            fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
+                            hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
+                            if not hist_exists: hwriter.writeheader()
+                            hwriter.writerow({
+                                "link": link,
+                                "timestamp": datetime.datetime.now().isoformat(),
+                                "text": assets['caption'],
+                                "ingested_at": datetime.datetime.now().isoformat()
+                            })
+                p_ids.add(tid)
+                p_links.add(common_utils.normalize_link(link))
+                update_queue_status(link, "Processed", task_type)
+                yield f"data:[SUCCESS] Saved.\n\n"
+            else:
+                err_msg = res_data.get('error') if isinstance(res_data, dict) else "Inference failed"
+                log_queue_error(link, err_msg, task_type)
+                yield f"data: [FAIL] {err_msg}.\n\n"
+            await asyncio.sleep(0.5)
+        yield "event: close\ndata: Done\n\n"
+    return StreamingResponse(queue_stream(), media_type="text/event-stream")

src/benchmarking.py CHANGED Viewed

@@ -1,224 +1,229 @@
-import pandas as pd
-import numpy as np
-import shutil
-import json
-import math
-from pathlib import Path
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import train_test_split
-# Lazy import to avoid startup overhead
-try:
-    from autogluon.tabular import TabularPredictor
-    AUTOGLUON_AVAILABLE = True
-except ImportError:
-    AUTOGLUON_AVAILABLE = False
-DATA_AI = Path("data/dataset.csv")
-DATA_MANUAL = Path("data/manual_dataset.csv")
-def sanitize_for_json(obj):
-    """Recursively clean floats for JSON output."""
-    if isinstance(obj, float):
-        if math.isnan(obj) or math.isinf(obj): return None
-        return obj
-    elif isinstance(obj, dict):
-        return {k: sanitize_for_json(v) for k, v in obj.items()}
-    elif isinstance(obj, list):
-        return[sanitize_for_json(v) for v in obj]
-    return obj
-def calculate_tag_accuracy(tags_ai, tags_man):
-    if pd.isna(tags_ai): tags_ai = ""
-    if pd.isna(tags_man): tags_man = ""
-    set_ai = set([t.strip().lower() for t in str(tags_ai).split(',') if t.strip()])
-    set_man = set([t.strip().lower() for t in str(tags_man).split(',') if t.strip()])
-    if not set_man and not set_ai: return 1.0
-    if not set_man or not set_ai: return 0.0
-    # Jaccard Similarity
-    return len(set_ai.intersection(set_man)) / len(set_ai.union(set_man))
-def get_combined_dataset():
-    """
-    Joins AI predictions with Manual Ground Truth on ID and calculates comprehensive vector differences.
-    """
-    if not DATA_AI.exists() or not DATA_MANUAL.exists():
-        return None
-    try:
-        # Load datasets
-        df_ai = pd.read_csv(DATA_AI)
-        df_manual = pd.read_csv(DATA_MANUAL)
-        # Normalize IDs (Trim spaces, ensure string)
-        df_ai['id'] = df_ai['id'].astype(str).str.strip()
-        df_manual['id'] = df_manual['id'].astype(str).str.strip()
-        df_manual_cols =['id', 'final_veracity_score', 'visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score', 'video_audio_score', 'video_caption_score', 'audio_caption_score', 'tags', 'classification']
-        # Merge on ID
-        merged = pd.merge(
-            df_ai,
-            df_manual[[c for c in df_manual_cols if c in df_manual.columns]],
-            on='id',
-            suffixes=('_ai', '_manual'),
-            how='inner'
-        )
-        # 1. Final Score Error
-        merged['final_veracity_score_ai'] = pd.to_numeric(merged['final_veracity_score_ai'], errors='coerce').fillna(0)
-        merged['final_veracity_score_manual'] = pd.to_numeric(merged['final_veracity_score_manual'], errors='coerce').fillna(0)
-        merged['abs_error'] = (merged['final_veracity_score_ai'] - merged['final_veracity_score_manual']).abs()
-        # 2. Sophisticated Vector Calculations
-        vector_pairs =[
-            ('visual_score', 'visual_integrity_score'),
-            ('audio_score', 'audio_integrity_score'),
-            ('source_score', 'source_credibility_score'),
-            ('logic_score', 'logical_consistency_score'),
-            ('emotion_score', 'emotional_manipulation_score'),
-            ('align_video_audio', 'video_audio_score'),
-            ('align_video_caption', 'video_caption_score'),
-            ('align_audio_caption', 'audio_caption_score'),
-        ]
-        error_cols =['abs_error']
-        for ai_c, man_c in vector_pairs:
-            if ai_c in merged.columns and man_c in merged.columns:
-                # Multiply 1-10 scores by 10 to put them on the same 0-100 scale as final score
-                merged[ai_c] = pd.to_numeric(merged[ai_c], errors='coerce').fillna(5) * 10
-                merged[man_c] = pd.to_numeric(merged[man_c], errors='coerce').fillna(5) * 10
-                err_c = f"err_{ai_c}"
-                merged[err_c] = (merged[ai_c] - merged[man_c]).abs()
-                error_cols.append(err_c)
-        # Composite MAE represents the mean absolute error across the final score AND all 8 sub-vectors
-        merged['composite_mae'] = merged[error_cols].mean(axis=1)
-        # 3. Tag Accuracy Calculation
-        merged['tag_accuracy'] = merged.apply(lambda row: calculate_tag_accuracy(row.get('tags_ai', ''), row.get('tags_manual', '')), axis=1)
-        return merged
-    except Exception as e:
-        print(f"Error merging datasets: {e}")
-        return None
-def format_config_params(params_raw):
-    """Parses the config_params JSON string into a readable format for the leaderboard."""
-    if pd.isna(params_raw) or not params_raw:
-        return "Defaults"
-    try:
-        if isinstance(params_raw, str):
-            p = json.loads(params_raw)
-        else:
-            p = params_raw
-        reprompts = p.get('reprompts', 0)
-        comments = "Yes" if p.get('include_comments') == 'true' or p.get('include_comments') is True else "No"
-        return f"Retries:{reprompts} | Context:{comments}"
-    except:
-        return "Legacy/Unknown"
-def calculate_benchmarks():
-    """Global stats (All AI models vs Ground Truth)."""
-    merged = get_combined_dataset()
-    if merged is None or len(merged) == 0:
-        return {"status": "no_data"}
-    mae = merged['composite_mae'].mean()
-    tag_acc = merged['tag_accuracy'].mean()
-    # Binary Accuracy (Threshold 50)
-    merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
-    merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
-    accuracy = (merged['bin_ai'] == merged['bin_manual']).mean()
-    recent_samples = merged.tail(5)[['id', 'composite_mae', 'final_veracity_score_ai', 'final_veracity_score_manual']].to_dict(orient='records')
-    result = {
-        "count": int(len(merged)),
-        "mae": round(mae, 2), # Exposing composite MAE as main MAE metric
-        "accuracy_percent": round(accuracy * 100, 1),
-        "tag_accuracy_percent": round(tag_acc * 100, 1),
-        "recent_samples": recent_samples
-    }
-    return sanitize_for_json(result)
-def generate_leaderboard():
-    """
-    Groups results by Configuration to rank models/prompts using sophisticated distance measurements.
-    """
-    merged = get_combined_dataset()
-    if merged is None or len(merged) == 0:
-        return[]
-    for col in['config_model', 'config_prompt', 'config_reasoning', 'config_params']:
-        if col not in merged.columns: merged[col] = "Unknown"
-    merged = merged.fillna({'config_model': 'Unknown', 'config_prompt': 'Standard', 'config_reasoning': 'None'})
-    merged['params_readable'] = merged['config_params'].apply(format_config_params)
-    merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
-    merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
-    merged['is_correct'] = (merged['bin_ai'] == merged['bin_manual']).astype(int)
-    def get_fcot_depth(row):
-        r = str(row['config_reasoning']).lower()
-        if 'fcot' in r: return 2
-        elif 'cot' in r: return 1
-        return 0
-    merged['fcot_depth'] = merged.apply(get_fcot_depth, axis=1)
-    # Group By Configuration using Composite MAE and Tag Accuracy
-    grouped = merged.groupby(['config_model', 'config_prompt', 'config_reasoning', 'params_readable', 'fcot_depth']).agg(
-        comp_mae=('composite_mae', 'mean'),
-        tag_accuracy=('tag_accuracy', 'mean'),
-        accuracy=('is_correct', 'mean'),
-        count=('id', 'count')
-    ).reset_index()
-    leaderboard =[]
-    for _, row in grouped.iterrows():
-        leaderboard.append({
-            "type": "GenAI",
-            "model": row['config_model'],
-            "prompt": row['config_prompt'],
-            "reasoning": row['config_reasoning'],
-            "params": row['params_readable'],
-            "fcot_depth": int(row['fcot_depth']),
-            "comp_mae": round(row['comp_mae'], 2),
-            "tag_acc": round(row['tag_accuracy'] * 100, 1),
-            "accuracy": round(row['accuracy'] * 100, 1),
-            "samples": int(row['count'])
-        })
-    # Sort: Highest Accuracy, Highest Tag Accuracy, then Lowest Composite MAE
-    leaderboard.sort(key=lambda x: (-x['accuracy'], -x['tag_acc'], x['comp_mae']))
-    return sanitize_for_json(leaderboard)
-def train_predictive_sandbox(features_config: dict):
-    if not DATA_MANUAL.exists(): return {"error": "No data"}
-    df = pd.read_csv(DATA_MANUAL).dropna(subset=['caption', 'final_veracity_score'])
-    if len(df) < 5: return {"error": "Not enough data"}
-    df['len'] = df['caption'].astype(str).apply(len)
-    keywords = ["shocking", "breaking", "watch"]
-    df['kw_count'] = df['caption'].astype(str).apply(lambda x: sum(1 for k in keywords if k in x.lower()))
-    feat_cols = ['len', 'kw_count']
-    df['target'] = (pd.to_numeric(df['final_veracity_score'], errors='coerce').fillna(0) >= 50).astype(int)
-    try:
-        X_train, X_test, y_train, y_test = train_test_split(df[feat_cols], df['target'], test_size=0.3, random_state=42)
-        clf = LogisticRegression()
-        clf.fit(X_train, y_train)
-        return {
-            "status": "success",
-            "type": "logistic_regression",
-            "accuracy": round(clf.score(X_test, y_test) * 100, 1),
-            "message": "Baseline trained on Caption Length + Keywords."
-        }
-    except Exception as e:
-        return {"error": str(e)}

+import pandas as pd
+import numpy as np
+import shutil
+import json
+import math
+from pathlib import Path
+# Lazy import to avoid startup overhead
+try:
+    from autogluon.tabular import TabularPredictor
+    AUTOGLUON_AVAILABLE = True
+except ImportError:
+    AUTOGLUON_AVAILABLE = False
+DATA_AI = Path("data/dataset.csv")
+DATA_MANUAL = Path("data/manual_dataset.csv")
+def sanitize_for_json(obj):
+    """Recursively clean floats for JSON output."""
+    if isinstance(obj, float):
+        if math.isnan(obj) or math.isinf(obj): return None
+        return obj
+    elif isinstance(obj, dict):
+        return {k: sanitize_for_json(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return[sanitize_for_json(v) for v in obj]
+    return obj
+def calculate_tag_accuracy(tags_ai, tags_man):
+    if pd.isna(tags_ai): tags_ai = ""
+    if pd.isna(tags_man): tags_man = ""
+    set_ai = set([t.strip().lower() for t in str(tags_ai).split(',') if t.strip()])
+    set_man = set([t.strip().lower() for t in str(tags_man).split(',') if t.strip()])
+    if not set_man and not set_ai: return 1.0
+    if not set_man or not set_ai: return 0.0
+    # Jaccard Similarity
+    return len(set_ai.intersection(set_man)) / len(set_ai.union(set_man))
+def get_combined_dataset():
+    """
+    Joins AI predictions with Manual Ground Truth on ID and calculates comprehensive vector differences.
+    """
+    if not DATA_AI.exists() or not DATA_MANUAL.exists():
+        return None
+    try:
+        # Load datasets
+        df_ai = pd.read_csv(DATA_AI)
+        df_manual = pd.read_csv(DATA_MANUAL)
+        # Normalize IDs (Trim spaces, ensure string)
+        df_ai['id'] = df_ai['id'].astype(str).str.strip()
+        df_manual['id'] = df_manual['id'].astype(str).str.strip()
+        df_manual_cols =['id', 'final_veracity_score', 'visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score', 'video_audio_score', 'video_caption_score', 'audio_caption_score', 'tags', 'classification']
+        # Merge on ID
+        merged = pd.merge(
+            df_ai,
+            df_manual[[c for c in df_manual_cols if c in df_manual.columns]],
+            on='id',
+            suffixes=('_ai', '_manual'),
+            how='inner'
+        )
+        # 1. Final Score Error
+        merged['final_veracity_score_ai'] = pd.to_numeric(merged['final_veracity_score_ai'], errors='coerce').fillna(0)
+        merged['final_veracity_score_manual'] = pd.to_numeric(merged['final_veracity_score_manual'], errors='coerce').fillna(0)
+        merged['abs_error'] = (merged['final_veracity_score_ai'] - merged['final_veracity_score_manual']).abs()
+        # 2. Sophisticated Vector Calculations
+        vector_pairs =[
+            ('visual_score', 'visual_integrity_score'),
+            ('audio_score', 'audio_integrity_score'),
+            ('source_score', 'source_credibility_score'),
+            ('logic_score', 'logical_consistency_score'),
+            ('emotion_score', 'emotional_manipulation_score'),
+            ('align_video_audio', 'video_audio_score'),
+            ('align_video_caption', 'video_caption_score'),
+            ('align_audio_caption', 'audio_caption_score'),
+        ]
+        error_cols = ['abs_error']
+        for ai_c, man_c in vector_pairs:
+            if ai_c in merged.columns and man_c in merged.columns:
+                # Multiply 1-10 scores by 10 to put them on the same 0-100 scale as final score
+                merged[ai_c] = pd.to_numeric(merged[ai_c], errors='coerce').fillna(5) * 10
+                merged[man_c] = pd.to_numeric(merged[man_c], errors='coerce').fillna(5) * 10
+                err_c = f"err_{ai_c}"
+                merged[err_c] = (merged[ai_c] - merged[man_c]).abs()
+                error_cols.append(err_c)
+        # Composite MAE represents the mean absolute error across the final score AND all 8 sub-vectors
+        merged['composite_mae'] = merged[error_cols].mean(axis=1)
+        # 3. Tag Accuracy Calculation
+        merged['tag_accuracy'] = merged.apply(lambda row: calculate_tag_accuracy(row.get('tags_ai', ''), row.get('tags_manual', '')), axis=1)
+        return merged
+    except Exception as e:
+        print(f"Error merging datasets: {e}")
+        return None
+def format_config_params(params_raw):
+    """Parses the config_params JSON string into a readable format for the leaderboard."""
+    if pd.isna(params_raw) or not params_raw:
+        return "Defaults"
+    try:
+        if isinstance(params_raw, str):
+            p = json.loads(params_raw)
+        else:
+            p = params_raw
+        reprompts = p.get('reprompts', 0)
+        comments = "Yes" if p.get('include_comments') == 'true' or p.get('include_comments') is True else "No"
+        return f"Retries:{reprompts} | Context:{comments}"
+    except:
+        return "Legacy/Unknown"
+def calculate_benchmarks():
+    """Global stats (All AI models vs Ground Truth)."""
+    merged = get_combined_dataset()
+    if merged is None or len(merged) == 0:
+        return {"status": "no_data"}
+    mae = merged['composite_mae'].mean()
+    tag_acc = merged['tag_accuracy'].mean()
+    # Binary Accuracy (Threshold 50)
+    merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
+    merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
+    accuracy = (merged['bin_ai'] == merged['bin_manual']).mean()
+    recent_samples = merged.tail(5)[['id', 'composite_mae', 'final_veracity_score_ai', 'final_veracity_score_manual']].to_dict(orient='records')
+    result = {
+        "count": int(len(merged)),
+        "mae": round(mae, 2), # Exposing composite MAE as main MAE metric
+        "accuracy_percent": round(accuracy * 100, 1),
+        "tag_accuracy_percent": round(tag_acc * 100, 1),
+        "recent_samples": recent_samples
+    }
+    return sanitize_for_json(result)
+def generate_leaderboard():
+    """
+    Groups results by Configuration to rank models/prompts using sophisticated distance measurements.
+    """
+    merged = get_combined_dataset()
+    if merged is None or len(merged) == 0:
+        return []
+    for col in['config_model', 'config_prompt', 'config_reasoning', 'config_params']:
+        if col not in merged.columns: merged[col] = "Unknown"
+    merged = merged.fillna({'config_model': 'Unknown', 'config_prompt': 'Standard', 'config_reasoning': 'None'})
+    merged['params_readable'] = merged['config_params'].apply(format_config_params)
+    def extract_tools(p_raw):
+        try:
+            if isinstance(p_raw, str): p = json.loads(p_raw)
+            else: p = p_raw
+            if not isinstance(p, dict): return "None"
+            tools =[]
+            if p.get('agent_active'): tools.append("Agent")
+            if p.get('use_search'): tools.append("Search")
+            if p.get('use_code'): tools.append("Code")
+            if p.get('few_shot') or p.get('multi_shot'): tools.append("Few-Shot")
+            return ", ".join(tools) if tools else "None"
+        except:
+            return "None"
+    merged['tools'] = merged['config_params'].apply(extract_tools)
+    merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
+    merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
+    merged['is_correct'] = (merged['bin_ai'] == merged['bin_manual']).astype(int)
+    def get_fcot_depth(row):
+        r = str(row['config_reasoning']).lower()
+        if 'fcot' in r: return 2
+        elif 'cot' in r: return 1
+        return 0
+    merged['fcot_depth'] = merged.apply(get_fcot_depth, axis=1)
+    agg_dict = {
+        'comp_mae': ('composite_mae', 'mean'),
+        'tag_accuracy': ('tag_accuracy', 'mean'),
+        'accuracy': ('is_correct', 'mean'),
+        'count': ('id', 'count')
+    }
+    err_cols =[
+        'err_visual_score', 'err_audio_score', 'err_source_score',
+        'err_logic_score', 'err_emotion_score', 'err_align_video_audio',
+        'err_align_video_caption', 'err_align_audio_caption'
+    ]
+    for col in err_cols:
+        if col in merged.columns:
+            agg_dict[col] = (col, 'mean')
+    # Group By Configuration using Composite MAE and Tag Accuracy
+    grouped = merged.groupby(['config_model', 'config_prompt', 'config_reasoning', 'params_readable', 'tools', 'fcot_depth']).agg(**agg_dict).reset_index()
+    leaderboard =[]
+    for _, row in grouped.iterrows():
+        entry = {
+            "type": "GenAI",
+            "model": row['config_model'],
+            "prompt": row['config_prompt'],
+            "reasoning": row['config_reasoning'],
+            "params": row['params_readable'],
+            "tools": row['tools'],
+            "fcot_depth": int(row['fcot_depth']),
+            "comp_mae": round(row['comp_mae'], 2),
+            "tag_acc": round(row['tag_accuracy'] * 100, 1),
+            "accuracy": round(row['accuracy'] * 100, 1),
+            "samples": int(row['count'])
+        }
+        for col in err_cols:
+            if col in row:
+                entry[col] = round(row[col], 2)
+        leaderboard.append(entry)
+    # Sort: Highest Accuracy, Highest Tag Accuracy, then Lowest Composite MAE
+    leaderboard.sort(key=lambda x: (-x['accuracy'], -x['tag_acc'], x['comp_mae']))
+    return sanitize_for_json(leaderboard)

src/common_utils.py CHANGED Viewed

@@ -1,104 +1,110 @@
-import os
-import re
-import csv
-import logging
-import datetime
-import subprocess
-import hashlib
-from pathlib import Path
-import yt_dlp
-import transcription
-logger = logging.getLogger(__name__)
-def robust_read_csv(file_path: Path):
-    if not file_path.exists():
-        return
-    try:
-        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-            clean_lines = (line.replace('\0', '') for line in f)
-            reader = csv.DictReader(clean_lines)
-            for row in reader:
-                if row:
-                    yield row
-    except Exception as e:
-        logger.error(f"Error reading CSV {file_path}: {e}")
-        return
-def extract_tweet_id(url: str) -> str | None:
-    if not url: return None
-    match = re.search(r"(?:twitter|x)\.com/[^/]+/status/(\d+)", url)
-    if match: return match.group(1)
-    return None
-def normalize_link(link: str) -> str:
-    if not link: return ""
-    return link.split('?')[0].strip().rstrip('/').replace('http://', '').replace('https://', '').replace('www.', '')
-def parse_vtt(file_path: str) -> str:
-    """Parses a .vtt subtitle file and returns the clean text content."""
-    try:
-        if not os.path.exists(file_path):
-            return "Transcript file not found."
-        with open(file_path, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-        text_lines =[]
-        for line in lines:
-            line = line.strip()
-            if line and not line.startswith('WEBVTT') and not '-->' in line and not line.isdigit():
-                clean_line = re.sub(r'<[^>]+>', '', line)
-                if clean_line and (not text_lines or clean_line != text_lines[-1]):
-                     text_lines.append(clean_line)
-        return "\n".join(text_lines) if text_lines else "No speech found in transcript."
-    except Exception as e:
-        logger.error(f"Error parsing VTT file {file_path}: {e}")
-        return f"Error reading transcript: {e}"
-async def prepare_video_assets(link: str, output_id: str) -> dict:
-    video_dir = Path("data/videos")
-    if not video_dir.exists():
-        video_dir.mkdir(parents=True, exist_ok=True)
-    video_path = video_dir / f"{output_id}.mp4"
-    audio_path = video_dir / f"{output_id}.wav"
-    transcript_path = video_dir / f"{output_id}.vtt"
-    caption = ""
-    video_downloaded = False
-    ydl_opts = {
-        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',
-        'outtmpl': str(video_path),
-        'quiet': True, 'ignoreerrors': True, 'no_warnings': True, 'skip_download': False
-    }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(link, download=False)
-            if info:
-                caption = info.get('description', '') or info.get('title', '')
-                formats = info.get('formats',[])
-                if not formats and not info.get('url'):
-                     logger.info(f"No video formats found for {link}. Treating as text-only.")
-                else:
-                    if not video_path.exists(): ydl.download([link])
-    except Exception as e:
-        logger.error(f"Download error for {link}: {e}")
-    if video_path.exists() and video_path.stat().st_size > 0:
-        video_downloaded = True
-        if not audio_path.exists():
-            subprocess.run(["ffmpeg", "-y", "-i", str(video_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(audio_path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-        if audio_path.exists() and not transcript_path.exists():
-            transcription.load_model()
-            transcription.generate_transcript(str(audio_path))
-    return {
-        "video": str(video_path) if video_downloaded else None,
-        "transcript": str(transcript_path) if video_downloaded and transcript_path.exists() else None,
-        "caption": caption
-    }

+import os
+import re
+import csv
+import logging
+import datetime
+import subprocess
+import hashlib
+from pathlib import Path
+import yt_dlp
+import transcription
+logger = logging.getLogger(__name__)
+def robust_read_csv(file_path: Path):
+    if not file_path.exists():
+        return
+    try:
+        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
+            clean_lines = (line.replace('\0', '') for line in f)
+            reader = csv.DictReader(clean_lines)
+            for row in reader:
+                if row:
+                    yield row
+    except Exception as e:
+        logger.error(f"Error reading CSV {file_path}: {e}")
+        return
+def extract_tweet_id(url: str) -> str | None:
+    if not url: return None
+    match = re.search(r"(?:twitter|x)\.com/[^/]+/status/(\d+)", url)
+    if match: return match.group(1)
+    return None
+def extract_twitter_username(url: str) -> str | None:
+    if not url: return None
+    match = re.search(r"(?:twitter|x)\.com/([^/]+)/status/\d+", url)
+    if match: return match.group(1).lower()
+    return None
+def normalize_link(link: str) -> str:
+    if not link: return ""
+    return link.split('?')[0].strip().rstrip('/').replace('http://', '').replace('https://', '').replace('www.', '')
+def parse_vtt(file_path: str) -> str:
+    """Parses a .vtt subtitle file and returns the clean text content."""
+    try:
+        if not os.path.exists(file_path):
+            return "Transcript file not found."
+        with open(file_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        text_lines =[]
+        for line in lines:
+            line = line.strip()
+            if line and not line.startswith('WEBVTT') and not '-->' in line and not line.isdigit():
+                clean_line = re.sub(r'<[^>]+>', '', line)
+                if clean_line and (not text_lines or clean_line != text_lines[-1]):
+                     text_lines.append(clean_line)
+        return "\n".join(text_lines) if text_lines else "No speech found in transcript."
+    except Exception as e:
+        logger.error(f"Error parsing VTT file {file_path}: {e}")
+        return f"Error reading transcript: {e}"
+async def prepare_video_assets(link: str, output_id: str) -> dict:
+    video_dir = Path("data/videos")
+    if not video_dir.exists():
+        video_dir.mkdir(parents=True, exist_ok=True)
+    video_path = video_dir / f"{output_id}.mp4"
+    audio_path = video_dir / f"{output_id}.wav"
+    transcript_path = video_dir / f"{output_id}.vtt"
+    caption = ""
+    video_downloaded = False
+    ydl_opts = {
+        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',
+        'outtmpl': str(video_path),
+        'quiet': True, 'ignoreerrors': True, 'no_warnings': True, 'skip_download': False
+    }
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(link, download=False)
+            if info:
+                caption = info.get('description', '') or info.get('title', '')
+                formats = info.get('formats',[])
+                if not formats and not info.get('url'):
+                     logger.info(f"No video formats found for {link}. Treating as text-only.")
+                else:
+                    if not video_path.exists(): ydl.download([link])
+    except Exception as e:
+        logger.error(f"Download error for {link}: {e}")
+    if video_path.exists() and video_path.stat().st_size > 0:
+        video_downloaded = True
+        if not audio_path.exists():
+            subprocess.run(["ffmpeg", "-y", "-i", str(video_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(audio_path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        if audio_path.exists() and not transcript_path.exists():
+            transcription.load_model()
+            transcription.generate_transcript(str(audio_path))
+    return {
+        "video": str(video_path) if video_downloaded else None,
+        "transcript": str(transcript_path) if video_downloaded and transcript_path.exists() else None,
+        "caption": caption
+    }

src/factuality_logic.py CHANGED Viewed

@@ -7,9 +7,13 @@ import asyncio
 from pathlib import Path
 import inference_logic
 from toon_parser import parse_toon_line
 logger = logging.getLogger(__name__)
 PROMPT_VISUAL_ARTIFACTS = (
     "Analyze the video for visual manipulation (Deepfakes, editing anomalies).\n"
     "Steps inside <thinking>: 1. Scan for artifacts. 2. Check cuts.\n"
@@ -36,29 +40,10 @@ PROMPT_AUDIO_ANALYSIS = (
     "Score(1-10),\"Justification text\""
 )
-def parse_vtt(file_path: str) -> str:
-    try:
-        if not os.path.exists(file_path):
-            return "Transcript file not found."
-        with open(file_path, 'r', encoding='utf-8') as f:
-            lines = f.readlines()
-        text_lines = []
-        for line in lines:
-            line = line.strip()
-            if line and not line.startswith('WEBVTT') and not '-->' in line and not line.isdigit():
-                clean_line = re.sub(r'<[^>]+>', '', line)
-                if clean_line and (not text_lines or clean_line != text_lines[-1]):
-                     text_lines.append(clean_line)
-        return "\n".join(text_lines) if text_lines else "No speech found in transcript."
-    except Exception as e:
-        logger.error(f"Error parsing VTT file {file_path}: {e}")
-        return f"Error reading transcript: {e}"
 async def run_factuality_pipeline(paths: dict, checks: dict, generation_config: dict):
     video_path = paths.get("video")
     transcript_path = paths.get("transcript")
@@ -78,7 +63,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
     yield f"\n--- Extracted Transcript ---\n{transcript}\n--------------------------\n\n"
     await asyncio.sleep(0.1)
-    analysis_steps = []
     if checks.get("visuals"):
         analysis_steps.append(("Visual Integrity", PROMPT_VISUAL_ARTIFACTS))
     if checks.get("content"):
@@ -96,6 +81,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
             sampling_fps = current_gen_config.pop("sampling_fps", 2.0)
             current_gen_config.pop("num_perceptions", None)
             current_gen_config["temperature"] = 0.1
             current_gen_config["do_sample"] = True
@@ -109,7 +95,9 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
             yield f"  - Analysis Complete for '{title}'. Parsing TOON...\n\n"
             parsed_result = {}
             match = re.search(r'(\w+_analysis): result\[2\]\{score,justification\}:\s*\n(.+)', ans, re.MULTILINE)
             thinking = "No thinking block found."
@@ -125,6 +113,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
                 yield f"Warning: Model did not return valid TOON. Raw output:\n{ans}\n"
                 continue
             score = parsed_result.get('score', 'N/A')
             justification = parsed_result.get('justification', 'No justification provided.')

 from pathlib import Path
 import inference_logic
 from toon_parser import parse_toon_line
+from common_utils import parse_vtt
 logger = logging.getLogger(__name__)
+# --- Enhanced TOON Prompts for Individual Checks ---
+# Using TOON reduces output tokens significantly compared to JSON.
 PROMPT_VISUAL_ARTIFACTS = (
     "Analyze the video for visual manipulation (Deepfakes, editing anomalies).\n"
     "Steps inside <thinking>: 1. Scan for artifacts. 2. Check cuts.\n"
     "Score(1-10),\"Justification text\""
 )
 async def run_factuality_pipeline(paths: dict, checks: dict, generation_config: dict):
+    """
+    Asynchronously runs a pipeline of factuality checks, parses TOON scores, and yields results.
+    """
     video_path = paths.get("video")
     transcript_path = paths.get("transcript")
     yield f"\n--- Extracted Transcript ---\n{transcript}\n--------------------------\n\n"
     await asyncio.sleep(0.1)
+    analysis_steps =[]
     if checks.get("visuals"):
         analysis_steps.append(("Visual Integrity", PROMPT_VISUAL_ARTIFACTS))
     if checks.get("content"):
             sampling_fps = current_gen_config.pop("sampling_fps", 2.0)
             current_gen_config.pop("num_perceptions", None)
+            # FORCE LOW TEMP for structured TOON analysis
             current_gen_config["temperature"] = 0.1
             current_gen_config["do_sample"] = True
             yield f"  - Analysis Complete for '{title}'. Parsing TOON...\n\n"
+            # --- Attempt to parse TOON from the model's response ---
             parsed_result = {}
+            # Regex to find the TOON data line: key: type[count]{headers}:\nVALUE
             match = re.search(r'(\w+_analysis): result\[2\]\{score,justification\}:\s*\n(.+)', ans, re.MULTILINE)
             thinking = "No thinking block found."
                 yield f"Warning: Model did not return valid TOON. Raw output:\n{ans}\n"
                 continue
+            # --- Display the parsed, structured result ---
             score = parsed_result.get('score', 'N/A')
             justification = parsed_result.get('justification', 'No justification provided.')

src/inference_logic.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import re
 import sys
 import os
-import time
 import logging
 import asyncio
 import json
-import requests
 import datetime
-# Safe imports for Lite Mode (API only)
 try:
     from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
     from peft import PeftModel
@@ -17,28 +17,30 @@ except ImportError:
     AutoProcessor = None
     PeftModel = None
-from labeling_logic import (
-    LABELING_PROMPT_TEMPLATE, SCORE_INSTRUCTIONS_SIMPLE, SCORE_INSTRUCTIONS_REASONING,
-    SCHEMA_SIMPLE, SCHEMA_REASONING,
-    FCOT_MACRO_PROMPT, FCOT_MESO_PROMPT, FCOT_SYNTHESIS_PROMPT
-)
-from toon_parser import parse_veracity_toon
-# Optional local imports
 try:
     from my_vision_process import process_vision_info, client
 except ImportError:
     process_vision_info = None
     client = None
 # Google GenAI Imports
 try:
     import google.generativeai as genai_legacy
-    from google.generativeai.types import generation_types, HarmCategory, HarmBlockThreshold
 except ImportError:
     genai_legacy = None
 try:
     from google import genai
     from google.genai.types import (
         GenerateContentConfig,
@@ -47,8 +49,7 @@ try:
         Tool,
         VertexAISearch,
         GoogleSearch,
-        Part,
-        SafetySetting
     )
     import vertexai
 except ImportError:
@@ -62,18 +63,133 @@ peft_model = None
 active_model = None
 logger = logging.getLogger(__name__)
-TEXT_ONLY_INSTRUCTIONS = """
-NOTE: You are operating in TEXT-ONLY mode. The video file could not be analyzed directly.
-You must rely entirely on the provided Context (Caption and Transcript) to deduce the veracity.
-If the text lacks sufficient detail to score visual or audio integrity, score them as 5 (Neutral/Unknown).
-"""
-def get_formatted_tag_list():
-    return "Suggested tags: politics, satire, deepfake, misleading, true, news"
 def load_models():
-    pass
 def extract_json_from_text(text):
     try:
         match = re.search(r'\{[\s\S]*\}', text)
@@ -82,39 +198,51 @@ def extract_json_from_text(text):
     except:
         pass
     return {}
-def smart_merge(base, new_data):
-    if not isinstance(new_data, dict): return new_data if new_data else base
-    if not isinstance(base, dict): return new_data
-    for k, v in new_data.items():
-        if k not in base: base[k] = v
-        else:
-            if isinstance(base[k], dict) and isinstance(v, dict): smart_merge(base[k], v)
-            else:
-                base_val = base[k]
-                new_val = v
-                is_base_valid = base_val and str(base_val) != "0" and str(base_val).lower() != "n/a"
-                is_new_valid = new_val and str(new_val) != "0" and str(new_val).lower() != "n/a"
-                if not is_base_valid and is_new_valid: base[k] = new_val
-    return base
 def validate_parsed_data(data, is_text_only):
     missing =[]
     if not data.get('video_context_summary'): missing.append("summary")
     final = data.get('final_assessment', {})
     if not final.get('reasoning') or len(str(final.get('reasoning', ''))) < 5: missing.append("final:reasoning")
     vectors = data.get('veracity_vectors', {})
-    for k in['visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score']:
         if k in['visual_integrity_score', 'audio_integrity_score'] and is_text_only: continue
         v = vectors.get(k)
         if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"vector:{k}")
     mod = data.get('modalities', {})
     for k in['video_audio_score', 'video_caption_score', 'audio_caption_score']:
         if k in['video_audio_score', 'video_caption_score'] and is_text_only: continue
         v = mod.get(k)
         if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"modality:{k}")
     return missing
 def save_debug_log(request_id, kind, content, attempt, label=""):
     if not request_id: return
     try:
@@ -128,82 +256,42 @@ def save_debug_log(request_id, kind, content, attempt, label=""):
     except Exception as e:
         logger.error(f"Failed to save debug log: {e}")
-async def attempt_toon_repair(original_text: str, schema: str, client, model_type: str, config: dict):
-    logger.info("Attempting TOON Repair...")
-    repair_prompt = f"SYSTEM: Reformat the following text into strict TOON schema. Infer missing scores as 0.\n\nSCHEMA:\n{schema}\n\nINPUT:\n{original_text}\n"
-    try:
-        loop = asyncio.get_event_loop()
-        repaired_text = ""
-        if model_type == 'gemini':
-            model = genai_legacy.GenerativeModel("models/gemini-2.0-flash-exp")
-            response = await loop.run_in_executor(None, lambda: model.generate_content(repair_prompt))
-            repaired_text = response.text
-        elif model_type == 'vertex':
-            cl = client if client else genai.Client(vertexai=True, project=config['project_id'], location=config['location'])
-            response = await loop.run_in_executor(None, lambda: cl.models.generate_content(model=config['model_name'], contents=repair_prompt))
-            repaired_text = response.text
-        return repaired_text
-    except Exception as e:
-        logger.error(f"Repair failed: {e}")
-        return original_text
 async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript: str, gemini_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
     if genai_legacy is None:
         yield "ERROR: Legacy SDK missing.\n"
         return
     api_key = gemini_config.get("api_key")
-    if not api_key:
-        yield "ERROR: No Gemini API Key provided."
-        return
     max_retries = int(gemini_config.get("max_retries", 1))
-    safety_settings =[
-        {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
-        {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
-        {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
-        {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
-    ]
     try:
         genai_legacy.configure(api_key=api_key)
         loop = asyncio.get_event_loop()
         uploaded_file = None
         is_text_only = False
         if video_path and os.path.exists(video_path):
-            yield f"Uploading video to Gemini..."
-            uploaded_file = await loop.run_in_executor(None, lambda: genai_legacy.upload_file(path=video_path, mime_type="video/mp4"))
-            wait_start = time.time()
-            while True:
-                uploaded_file = await loop.run_in_executor(None, lambda: genai_legacy.get_file(uploaded_file.name))
-                state_name = uploaded_file.state.name
-                if state_name == "ACTIVE": break
-                elif state_name == "FAILED":
-                    yield "ERROR: Google failed to process video."
-                    return
-                if time.time() - wait_start > 300:
-                    yield "ERROR: Video processing timed out."
-                    return
-                yield "Processing video on Google servers..."
-                await asyncio.sleep(5)
-        else:
-            is_text_only = True
-        model_name = gemini_config.get("model_name") or "models/gemini-2.0-flash-exp"
-        model = genai_legacy.GenerativeModel(model_name)
         toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
         score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
-        if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
-        raw_text = ""
-        prompt_used = ""
-        gen_config = {"temperature": 0.1}
         accumulated_data = {}
         fcot_trace = {}
         full_raw_text = ""
         for attempt in range(max_retries + 1):
             raw_text = ""
@@ -228,52 +316,46 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
                 save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
             else:
                 if reasoning_method == "fcot":
-                    yield "Starting FCoT (Gemini)..."
                     chat = model.start_chat(history=[])
                     macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
-                    if is_text_only: macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
                     save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
-                    inputs1 =[macro_prompt]
                     if uploaded_file: inputs1.insert(0, uploaded_file)
-                    res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1, safety_settings=safety_settings))
                     macro_hypothesis = res1.text
                     save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
                     fcot_trace['macro'] = macro_hypothesis
-                    yield f"Hypothesis: {macro_hypothesis[:100]}...\n"
                     meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
                     save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
-                    res2 = await loop.run_in_executor(None, lambda: chat.send_message(meso_prompt, safety_settings=safety_settings))
                     micro_observations = res2.text
                     save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
                     fcot_trace['meso'] = micro_observations
-                    synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=get_formatted_tag_list())
                     save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
-                    res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt, safety_settings=safety_settings))
                     raw_text = res3.text
                     save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
-                    prompt_used = f"FCoT:\n{macro_prompt}\n..."
                 else:
-                    prompt_text = LABELING_PROMPT_TEMPLATE.format(system_persona=system_persona, caption=caption, transcript=transcript, toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=get_formatted_tag_list())
-                    if is_text_only: prompt_text = "NOTE: Text Only Analysis.\n" + prompt_text
                     prompt_used = prompt_text
-                    save_debug_log(request_id, 'prompt', prompt_text, attempt, 'standard')
-                    yield f"Generating Labels ({model_name})..."
                     inputs = [prompt_text]
                     if uploaded_file: inputs.append(uploaded_file)
-                    response = await loop.run_in_executor(
-                        None,
-                        lambda: model.generate_content(inputs, generation_config=gen_config, safety_settings=safety_settings)
-                    )
                     raw_text = response.text
-                    save_debug_log(request_id, 'response', raw_text, attempt, 'standard')
             if raw_text:
                 full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
                 parsed_step = parse_veracity_toon(raw_text)
@@ -286,28 +368,20 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
                             else:
                                 parsed_step[k] = json_data[k]
                 accumulated_data = smart_merge(accumulated_data, parsed_step)
             missing_fields = validate_parsed_data(accumulated_data, is_text_only)
             if not missing_fields:
-                yield "Validation Passed. All factuality components processed and confidence scores obtained.\n"
                 yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                 break
             if attempt == max_retries:
                  yield f"Max retries reached. Saving incomplete data.\n"
                  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                  break
-        if uploaded_file:
-            try:
-                await loop.run_in_executor(None, lambda: genai_legacy.delete_file(name=uploaded_file.name))
-            except Exception:
-                pass
-    except Exception as e:
-        logger.error(f"Gemini Pipeline Error: {e}", exc_info=True)
-        yield f"ERROR (Gemini): {e}"
 async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript: str, vertex_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
     if genai is None:
@@ -315,52 +389,55 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
         return
     project_id = vertex_config.get("project_id")
-    if not project_id:
-        yield "ERROR: No Vertex Project ID."
-        return
-    safety_settings =[
-        SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_ONLY_HIGH"),
-        SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH"),
-        SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_ONLY_HIGH"),
-        SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_ONLY_HIGH"),
-    ]
     try:
-        api_key = vertex_config.get("api_key")
         if api_key:
-            client = genai.Client(vertexai=True, project=project_id, location=vertex_config.get("location", "us-central1"), api_key=api_key)
         else:
-            client = genai.Client(vertexai=True, project=project_id, location=vertex_config.get("location", "us-central1"))
         video_part = None
         is_text_only = False
         if video_path and os.path.exists(video_path):
             with open(video_path, 'rb') as f: video_bytes = f.read()
             video_part = Part.from_bytes(data=video_bytes, mime_type="video/mp4")
-        else:
-            is_text_only = True
-        toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
-        score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
-        model_name = vertex_config.get("model_name", "gemini-2.5-flash-lite")
-        max_retries = int(vertex_config.get("max_retries", 1))
-        raw_text = ""
-        prompt_used = ""
-        loop = asyncio.get_event_loop()
         config = GenerateContentConfig(
-            temperature=0.1,
-            response_mime_type="text/plain",
-            tools=[Tool(google_search=GoogleSearch())] if vertex_config.get("use_search", True) else None,
-            safety_settings=safety_settings
         )
-        if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
         accumulated_data = {}
         fcot_trace = {}
         full_raw_text = ""
         for attempt in range(max_retries + 1):
             raw_text = ""
@@ -368,6 +445,7 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
                 missing = validate_parsed_data(accumulated_data, is_text_only)
                 yield f"Validation failed. Missing or incomplete fields: {missing}. Initiating Iterative Reprompt (Attempt {attempt}/{max_retries}) to acquire remaining factuality components...\n"
                 prompt_text = (
                      f"SYSTEM: Review the previous attempt which failed validation.\n"
                      f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
@@ -388,12 +466,12 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
                 save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
             else:
                 if reasoning_method == "fcot":
-                    yield "Starting FCoT (Vertex)..."
                     chat = client.chats.create(model=model_name, config=config)
                     macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
                     save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
-                    inputs1 =[macro_prompt]
                     if video_part: inputs1.insert(0, video_part)
                     else: inputs1[0] = "NOTE: Text Only Analysis.\n" + inputs1[0]
@@ -401,7 +479,6 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
                     macro_hypothesis = res1.text
                     save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
                     fcot_trace['macro'] = macro_hypothesis
-                    yield f"Hypothesis: {macro_hypothesis[:80]}...\n"
                     meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
                     save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
@@ -410,31 +487,27 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
                     save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
                     fcot_trace['meso'] = micro_observations
-                    synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=get_formatted_tag_list())
                     save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
                     res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
                     raw_text = res3.text
                     save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
-                    prompt_used = f"FCoT (Vertex):\n{macro_prompt}..."
                 else:
-                    prompt_text = LABELING_PROMPT_TEMPLATE.format(system_persona=system_persona, caption=caption, transcript=transcript, toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=get_formatted_tag_list())
-                    contents = []
                     if video_part: contents =[video_part, prompt_text]
                     else: contents =[f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"]
                     prompt_used = prompt_text
-                    save_debug_log(request_id, 'prompt', prompt_text, attempt, 'standard')
-                    yield f"Generating Labels ({model_name})..."
-                    response = await loop.run_in_executor(
-                        None,
-                        lambda: client.models.generate_content(model=model_name, contents=contents, config=config)
-                    )
                     raw_text = response.text
-                    save_debug_log(request_id, 'response', raw_text, attempt, 'standard')
-            if not raw_text:
-                 yield {"error": "Empty Response"}
-                 return
             if raw_text:
                 full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
@@ -451,7 +524,7 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
             missing_fields = validate_parsed_data(accumulated_data, is_text_only)
             if not missing_fields:
-                yield "Validation Passed. All factuality components processed and confidence scores obtained.\n"
                 yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                 break
@@ -459,12 +532,11 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
                  yield f"Max retries reached. Saving incomplete data.\n"
                  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                  break
     except Exception as e:
-        yield f"ERROR (Vertex): {e}"
         logger.error("Vertex Labeling Error", exc_info=True)
 async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: str, nrp_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
     api_key = nrp_config.get("api_key")
     model_name = nrp_config.get("model_name", "gpt-4")
@@ -482,6 +554,11 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
     is_text_only = True
     system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
     toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
     score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
@@ -499,11 +576,30 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
             "messages": messages,
             "temperature": 0.1
         }
         def do_request():
             resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
             if resp.status_code != 200:
                 raise Exception(f"API Error {resp.status_code}: {resp.text}")
-            return resp.json()["choices"][0]["message"]["content"]
         return await loop.run_in_executor(None, do_request)
     try:
@@ -511,63 +607,85 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
             raw_text = ""
             if attempt > 0:
                 missing = validate_parsed_data(accumulated_data, is_text_only)
-                yield f"Validation failed. Missing fields: {missing}. Initiating Reprompt...\n"
                 prompt_text = (
                      f"SYSTEM: Review the previous attempt which failed validation.\n"
                      f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
                      f"PREVIOUS (PARTIAL) DATA: {json.dumps(accumulated_data, indent=2)}\n"
                      f"MISSING FIELDS: {missing}\n"
-                     f"INSTRUCTION: Generate the missing fields to complete the schema.\n"
                      f"{toon_schema}"
                 )
                 save_debug_log(request_id, 'prompt', prompt_text, attempt, 'reprompt')
                 raw_text = await _call_nrp([
                     {"role": "system", "content": system_persona},
                     {"role": "user", "content": prompt_text}
-                ])
                 save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
             else:
                 if reasoning_method == "fcot":
                     yield "Starting Fractal Chain of Thought (NRP FCoT)...\n"
                     macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
                     macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
                     save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
                     macro_messages =[{"role": "system", "content": system_persona}, {"role": "user", "content": macro_prompt}]
-                    macro_hypothesis = await _call_nrp(macro_messages)
                     save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
                     fcot_trace['macro'] = macro_hypothesis
                     meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
                     save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
                     meso_messages = macro_messages +[{"role": "assistant", "content": macro_hypothesis}, {"role": "user", "content": meso_prompt}]
-                    micro_observations = await _call_nrp(meso_messages)
                     save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
                     fcot_trace['meso'] = micro_observations
                     synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
                     save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
                     synthesis_messages = meso_messages +[{"role": "assistant", "content": micro_observations}, {"role": "user", "content": synthesis_prompt}]
-                    raw_text = await _call_nrp(synthesis_messages)
                     save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
                     prompt_used = f"FCoT (NRP):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
                 else:
-                    prompt_text = LABELING_PROMPT_TEMPLATE.format(
                         system_persona=system_persona, caption=caption, transcript=transcript,
                         toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
                     )
                     prompt_text = f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"
                     prompt_used = prompt_text
-                    save_debug_log(request_id, 'prompt', prompt_text, attempt, 'standard')
-                    yield "Generating Labels (NRP CoT)...\n"
                     raw_text = await _call_nrp([
                         {"role": "system", "content": system_persona},
                         {"role": "user", "content": prompt_text}
-                    ])
-                    save_debug_log(request_id, 'response', raw_text, attempt, 'standard')
             if raw_text:
                 full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
@@ -584,14 +702,10 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
             missing_fields = validate_parsed_data(accumulated_data, is_text_only)
             if not missing_fields:
-                yield "Validation Passed.\n"
                 yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                 break
             if attempt == max_retries:
-                 yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
-                 break
-    except Exception as e:
-        yield f"ERROR: {e}\n\n"
-        logger.error("NRP Labeling Error", exc_info=True)

+import torch
 import re
+import ast
 import sys
 import os
 import logging
 import asyncio
 import json
 import datetime
+import requests
 try:
     from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
     from peft import PeftModel
     AutoProcessor = None
     PeftModel = None
 try:
     from my_vision_process import process_vision_info, client
 except ImportError:
     process_vision_info = None
     client = None
+from labeling_logic import (
+    LABELING_PROMPT_TEMPLATE, LABELING_PROMPT_TEMPLATE_NO_COT,
+    SCORE_INSTRUCTIONS_SIMPLE, SCORE_INSTRUCTIONS_REASONING,
+    SCHEMA_SIMPLE, SCHEMA_REASONING,
+    FCOT_MACRO_PROMPT, FCOT_MESO_PROMPT, FCOT_SYNTHESIS_PROMPT, TEXT_ONLY_INSTRUCTIONS,
+    get_formatted_tag_list
+)
+from toon_parser import parse_veracity_toon
 # Google GenAI Imports
 try:
     import google.generativeai as genai_legacy
+    from google.generativeai.types import generation_types
 except ImportError:
     genai_legacy = None
 try:
+    # Modern Google GenAI SDK (v1)
     from google import genai
     from google.genai.types import (
         GenerateContentConfig,
         Tool,
         VertexAISearch,
         GoogleSearch,
+        Part
     )
     import vertexai
 except ImportError:
 active_model = None
 logger = logging.getLogger(__name__)
 def load_models():
+    global LITE_MODE, processor, base_model, peft_model, active_model
+    if LITE_MODE:
+        logger.info("LITE_MODE is enabled. Skipping local model loading.")
+        return
+    if base_model is not None: return
+    if not torch.cuda.is_available():
+        logger.warning("CUDA is not available. This application requires a GPU for local models. Switching to LITE_MODE.")
+        LITE_MODE = True
+        return
+    device = torch.device("cuda")
+    logger.info(f"CUDA is available. Initializing models on {device}...")
+    local_model_path = "/app/local_model"
+    try:
+        import flash_attn
+        attn_implementation = "flash_attention_2"
+    except ImportError:
+        attn_implementation = "sdpa"
+    logger.info(f"Loading base model from {local_model_path}...")
+    try:
+        base_model = Qwen3VLForConditionalGeneration.from_pretrained(
+            local_model_path, dtype=torch.bfloat16, device_map="auto", attn_implementation=attn_implementation
+        ).eval()
+        processor = AutoProcessor.from_pretrained(local_model_path)
+        active_model = base_model
+    except Exception as e:
+        logger.error(f"Failed to load local model: {e}")
+        LITE_MODE = True
+def switch_active_model(model_name: str):
+    global active_model, base_model, peft_model
+    if model_name == "custom" and peft_model is not None:
+        active_model = peft_model
+    else:
+        active_model = base_model
+def inference_step(video_path, prompt, generation_kwargs, sampling_fps, pred_glue=None):
+    global processor, active_model
+    if active_model is None: raise RuntimeError("Models not loaded.")
+    messages =[
+        {"role": "user", "content":[
+                {"type": "video", "video": video_path, 'key_time': pred_glue, 'fps': sampling_fps,
+                 "total_pixels": 128*12 * 28 * 28, "min_pixels": 128 * 28 * 28},
+                {"type": "text", "text": prompt},
+            ]
+        },
+    ]
+    text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs, video_kwargs = process_vision_info(messages, return_video_kwargs=True, client=client)
+    fps_inputs = video_kwargs['fps'][0]
+    inputs = processor(text=[text], images=image_inputs, videos=video_inputs, fps=fps_inputs, padding=True, return_tensors="pt")
+    inputs = {k: v.to(active_model.device) for k, v in inputs.items()}
+    with torch.no_grad():
+        output_ids = active_model.generate(**inputs, **generation_kwargs, use_cache=True)
+    generated_ids = [output_ids[i][len(inputs['input_ids'][i]):] for i in range(len(output_ids))]
+    output_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+    return output_text[0]
+async def generate_simple_text(prompt: str, model_type: str, config: dict):
+    loop = asyncio.get_event_loop()
+    try:
+        if model_type == 'gemini':
+            if genai_legacy is None: return "Error: Legacy SDK missing."
+            genai_legacy.configure(api_key=config.get("api_key"))
+            model = genai_legacy.GenerativeModel(config.get("model_name", "models/gemini-2.0-flash-exp"))
+            response = await loop.run_in_executor(
+                None,
+                lambda: model.generate_content(prompt, generation_config={"temperature": 0.0})
+            )
+            return response.text
+        elif model_type == 'vertex':
+            if genai is None: return "Error: Vertex SDK missing."
+            api_key = config.get("api_key")
+            if api_key:
+                cl = genai.Client(vertexai=True, project=config['project_id'], location=config['location'], api_key=api_key)
+            else:
+                cl = genai.Client(vertexai=True, project=config['project_id'], location=config['location'])
+            response = await loop.run_in_executor(
+                None,
+                lambda: cl.models.generate_content(
+                    model=config['model_name'],
+                    contents=prompt,
+                    config=GenerateContentConfig(temperature=0.0)
+                )
+            )
+            return response.text
+        elif model_type == 'nrp':
+            api_key = config.get("api_key")
+            model_name = config.get("model_name", "gpt-4")
+            base_url = config.get("base_url", "https://api.openai.com/v1").rstrip("/")
+            if not api_key: return "Error: NRP API key missing."
+            headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
+            payload = {"model": model_name, "messages":[{"role": "user", "content": prompt}], "temperature": 0.0}
+            def do_request():
+                resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
+                if resp.status_code == 200:
+                    return resp.json()["choices"][0]["message"]["content"]
+                return f"Error: {resp.status_code} {resp.text}"
+            return await loop.run_in_executor(None, do_request)
+    except Exception as e:
+        logger.error(f"Text Gen Error: {e}")
+        return f"Error generating text: {e}"
+async def generate_community_summary(comments: list, model_type: str, config: dict):
+    if not comments: return "No comments available."
+    c_text = "\n".join([f"- {c.get('author', 'User')}: {c.get('text', '')}" for c in comments[:15]])
+    prompt = (
+        "You are a Community Context Analyst. Analyze the following user comments regarding a social media post.\n"
+        "Your goal is to extract 'Community Notes' - specifically looking for fact-checking, debunking, or additional context provided by users.\n"
+        f"COMMENTS:\n{c_text}\n\n"
+        "OUTPUT:\n"
+        "Provide a concise 1-paragraph summary of the community consensus regarding the veracity of the post."
+    )
+    return await generate_simple_text(prompt, model_type, config)
 def extract_json_from_text(text):
     try:
         match = re.search(r'\{[\s\S]*\}', text)
     except:
         pass
     return {}
 def validate_parsed_data(data, is_text_only):
     missing =[]
     if not data.get('video_context_summary'): missing.append("summary")
     final = data.get('final_assessment', {})
     if not final.get('reasoning') or len(str(final.get('reasoning', ''))) < 5: missing.append("final:reasoning")
     vectors = data.get('veracity_vectors', {})
+    required_vectors =['visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score']
+    for k in required_vectors:
         if k in['visual_integrity_score', 'audio_integrity_score'] and is_text_only: continue
         v = vectors.get(k)
         if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"vector:{k}")
     mod = data.get('modalities', {})
     for k in['video_audio_score', 'video_caption_score', 'audio_caption_score']:
         if k in['video_audio_score', 'video_caption_score'] and is_text_only: continue
         v = mod.get(k)
         if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"modality:{k}")
+    fact = data.get('factuality_factors', {})
+    if not fact.get('claim_accuracy'): missing.append("factuality:claim_accuracy")
+    disinfo = data.get('disinformation_analysis', {})
+    if not disinfo.get('classification'): missing.append("disinfo:classification")
     return missing
+def smart_merge(base, new_data):
+    if not isinstance(new_data, dict): return new_data if new_data else base
+    if not isinstance(base, dict): return new_data
+    for k, v in new_data.items():
+        if k not in base: base[k] = v
+        else:
+            if isinstance(base[k], dict) and isinstance(v, dict): smart_merge(base[k], v)
+            else:
+                base_val = base[k]
+                new_val = v
+                is_base_valid = base_val and str(base_val) != "0" and str(base_val).lower() != "n/a"
+                is_new_valid = new_val and str(new_val) != "0" and str(new_val).lower() != "n/a"
+                if not is_base_valid and is_new_valid: base[k] = new_val
+    return base
 def save_debug_log(request_id, kind, content, attempt, label=""):
     if not request_id: return
     try:
     except Exception as e:
         logger.error(f"Failed to save debug log: {e}")
 async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript: str, gemini_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
     if genai_legacy is None:
         yield "ERROR: Legacy SDK missing.\n"
         return
     api_key = gemini_config.get("api_key")
+    if not api_key: return
     max_retries = int(gemini_config.get("max_retries", 1))
     try:
         genai_legacy.configure(api_key=api_key)
         loop = asyncio.get_event_loop()
         uploaded_file = None
         is_text_only = False
         if video_path and os.path.exists(video_path):
+             uploaded_file = await loop.run_in_executor(None, lambda: genai_legacy.upload_file(path=video_path))
+             while uploaded_file.state.name == "PROCESSING": await asyncio.sleep(2)
+        else: is_text_only = True
+        active_tools =[]
+        if gemini_config.get("use_search", False):
+            active_tools.append({"google_search_retrieval": {}})
+            system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
+        if gemini_config.get("use_code", False):
+            active_tools.append({"code_execution": {}})
+            system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
+        model = genai_legacy.GenerativeModel("models/gemini-2.0-flash-exp", tools=active_tools if active_tools else None)
         toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
         score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
+        tag_list_text = get_formatted_tag_list()
         accumulated_data = {}
+        prompt_used = ""
         fcot_trace = {}
         full_raw_text = ""
+        if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
         for attempt in range(max_retries + 1):
             raw_text = ""
                 save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
             else:
                 if reasoning_method == "fcot":
+                    yield "Starting Fractal Chain of Thought (Gemini FCoT)..."
                     chat = model.start_chat(history=[])
                     macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
                     save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
+                    inputs1 = [macro_prompt]
                     if uploaded_file: inputs1.insert(0, uploaded_file)
+                    res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1))
                     macro_hypothesis = res1.text
                     save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
                     fcot_trace['macro'] = macro_hypothesis
                     meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
                     save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
+                    res2 = await loop.run_in_executor(None, lambda: chat.send_message(meso_prompt))
                     micro_observations = res2.text
                     save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
                     fcot_trace['meso'] = micro_observations
+                    synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
                     save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
+                    res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
                     raw_text = res3.text
                     save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
+                    prompt_used = f"FCoT Pipeline:\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
                 else:
+                    template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
+                    prompt_text = template.format(
+                        system_persona=system_persona, caption=caption, transcript=transcript,
+                        toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
+                    )
                     prompt_used = prompt_text
+                    if is_text_only: prompt_text = "NOTE: Text Analysis Only.\n" + prompt_text
+                    save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
                     inputs = [prompt_text]
                     if uploaded_file: inputs.append(uploaded_file)
+                    response = await loop.run_in_executor(None, lambda: model.generate_content(inputs, generation_config={"temperature": 0.1}))
                     raw_text = response.text
+                    save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
             if raw_text:
                 full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
                 parsed_step = parse_veracity_toon(raw_text)
                             else:
                                 parsed_step[k] = json_data[k]
                 accumulated_data = smart_merge(accumulated_data, parsed_step)
             missing_fields = validate_parsed_data(accumulated_data, is_text_only)
             if not missing_fields:
+                yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
                 yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                 break
             if attempt == max_retries:
                  yield f"Max retries reached. Saving incomplete data.\n"
                  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                  break
+        if uploaded_file: await loop.run_in_executor(None, lambda: genai_legacy.delete_file(name=uploaded_file.name))
+    except Exception as e: yield f"ERROR: {e}"
 async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript: str, vertex_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
     if genai is None:
         return
     project_id = vertex_config.get("project_id")
+    location = vertex_config.get("location", "us-central1")
+    model_name = vertex_config.get("model_name", "gemini-1.5-pro-preview-0409")
+    max_retries = int(vertex_config.get("max_retries", 1))
+    api_key = vertex_config.get("api_key")
+    if not project_id: return
     try:
+        # Pass api_key directly if available to use API Keys instead of ADC Service Accounts
         if api_key:
+            client = genai.Client(vertexai=True, project=project_id, location=location, api_key=api_key)
         else:
+            client = genai.Client(vertexai=True, project=project_id, location=location)
         video_part = None
         is_text_only = False
         if video_path and os.path.exists(video_path):
             with open(video_path, 'rb') as f: video_bytes = f.read()
             video_part = Part.from_bytes(data=video_bytes, mime_type="video/mp4")
+        else: is_text_only = True
+        active_tools =[]
+        if vertex_config.get("use_search", False):
+            active_tools.append(Tool(google_search=GoogleSearch()))
+            system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
+        if vertex_config.get("use_code", False):
+            try:
+                from google.genai.types import CodeExecution
+                active_tools.append(Tool(code_execution=CodeExecution()))
+                system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
+            except ImportError:
+                pass
         config = GenerateContentConfig(
+            temperature=0.1, response_mime_type="text/plain", max_output_tokens=8192,
+            tools=active_tools if active_tools else None
         )
+        toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
+        score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
+        tag_list_text = get_formatted_tag_list()
         accumulated_data = {}
+        prompt_used = ""
         fcot_trace = {}
         full_raw_text = ""
+        loop = asyncio.get_event_loop()
+        if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
         for attempt in range(max_retries + 1):
             raw_text = ""
                 missing = validate_parsed_data(accumulated_data, is_text_only)
                 yield f"Validation failed. Missing or incomplete fields: {missing}. Initiating Iterative Reprompt (Attempt {attempt}/{max_retries}) to acquire remaining factuality components...\n"
+                # REPROMPT CONSTRUCTION
                 prompt_text = (
                      f"SYSTEM: Review the previous attempt which failed validation.\n"
                      f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
                 save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
             else:
                 if reasoning_method == "fcot":
+                    yield "Starting Fractal Chain of Thought (Vertex FCoT)..."
                     chat = client.chats.create(model=model_name, config=config)
                     macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
                     save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
+                    inputs1 = [macro_prompt]
                     if video_part: inputs1.insert(0, video_part)
                     else: inputs1[0] = "NOTE: Text Only Analysis.\n" + inputs1[0]
                     macro_hypothesis = res1.text
                     save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
                     fcot_trace['macro'] = macro_hypothesis
                     meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
                     save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
                     save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
                     fcot_trace['meso'] = micro_observations
+                    synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
                     save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
                     res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
                     raw_text = res3.text
                     save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
+                    prompt_used = f"FCoT (Vertex):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
                 else:
+                    template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
+                    prompt_text = template.format(
+                        system_persona=system_persona, caption=caption, transcript=transcript,
+                        toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
+                    )
+                    contents =[]
                     if video_part: contents =[video_part, prompt_text]
                     else: contents =[f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"]
                     prompt_used = prompt_text
+                    save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
+                    yield f"Generating Labels (Vertex {reasoning_method.upper()})..."
+                    response = await loop.run_in_executor(None, lambda: client.models.generate_content(model=model_name, contents=contents, config=config))
                     raw_text = response.text
+                    save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
             if raw_text:
                 full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
             missing_fields = validate_parsed_data(accumulated_data, is_text_only)
             if not missing_fields:
+                yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
                 yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                 break
                  yield f"Max retries reached. Saving incomplete data.\n"
                  yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                  break
     except Exception as e:
+        yield f"ERROR: {e}"
         logger.error("Vertex Labeling Error", exc_info=True)
 async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: str, nrp_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
     api_key = nrp_config.get("api_key")
     model_name = nrp_config.get("model_name", "gpt-4")
     is_text_only = True
     system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
+    if nrp_config.get("use_search", False):
+        system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
+    if nrp_config.get("use_code", False):
+        system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
     toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
     score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
             "messages": messages,
             "temperature": 0.1
         }
+        logger.info(f"[{request_id}] NRP API Call ({attempt_label}) - URL: {base_url}/chat/completions")
+        logger.info(f"[{request_id}] NRP API Call - Model: {model_name}")
+        logger.info(f"[{request_id}] NRP API Call - Messages count: {len(messages)}")
         def do_request():
+            start_time = datetime.datetime.now()
+            logger.info(f"[{request_id}] Dispatching requests.post (timeout=600s)...")
             resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
+            elapsed = (datetime.datetime.now() - start_time).total_seconds()
+            logger.info(f"[{request_id}] NRP API Response received in {elapsed:.2f}s. Status Code: {resp.status_code}")
             if resp.status_code != 200:
+                logger.error(f"[{request_id}] API Error {resp.status_code}: {resp.text}")
                 raise Exception(f"API Error {resp.status_code}: {resp.text}")
+            resp_json = resp.json()
+            usage = resp_json.get("usage", {})
+            logger.info(f"[{request_id}] NRP API Usage: {usage}")
+            return resp_json["choices"][0]["message"]["content"]
         return await loop.run_in_executor(None, do_request)
     try:
             raw_text = ""
             if attempt > 0:
                 missing = validate_parsed_data(accumulated_data, is_text_only)
+                yield f"Validation failed. Missing fields: {missing}. Initiating Reprompt (Attempt {attempt}/{max_retries})...\n"
                 prompt_text = (
                      f"SYSTEM: Review the previous attempt which failed validation.\n"
                      f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
                      f"PREVIOUS (PARTIAL) DATA: {json.dumps(accumulated_data, indent=2)}\n"
                      f"MISSING FIELDS: {missing}\n"
+                     f"INSTRUCTION: Generate the missing fields to complete the schema. You MUST provide the missing scores for {missing}.\n"
+                     f"Output the FULL VALID TOON OBJECT containing all required fields.\n"
                      f"{toon_schema}"
                 )
                 save_debug_log(request_id, 'prompt', prompt_text, attempt, 'reprompt')
+                yield f"  - Sending Reprompt request to NRP API (Model: {model_name}, Timeout: 600s)...\n"
                 raw_text = await _call_nrp([
                     {"role": "system", "content": system_persona},
                     {"role": "user", "content": prompt_text}
+                ], attempt_label=f"reprompt_{attempt}")
+                yield f"  - Received Reprompt response from NRP API.\n\n"
                 save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
             else:
                 if reasoning_method == "fcot":
                     yield "Starting Fractal Chain of Thought (NRP FCoT)...\n"
                     macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
                     macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
                     save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
                     macro_messages =[{"role": "system", "content": system_persona}, {"role": "user", "content": macro_prompt}]
+                    yield f"  - Stage 1: Sending Macro Hypothesis request to NRP API (Timeout: 600s)...\n"
+                    macro_hypothesis = await _call_nrp(macro_messages, attempt_label="fcot_macro")
+                    yield f"  - Stage 1: Received Macro Hypothesis response.\n"
                     save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
                     fcot_trace['macro'] = macro_hypothesis
                     meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
                     save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
                     meso_messages = macro_messages +[{"role": "assistant", "content": macro_hypothesis}, {"role": "user", "content": meso_prompt}]
+                    yield f"  - Stage 2: Sending Meso Analysis request to NRP API (Timeout: 600s)...\n"
+                    micro_observations = await _call_nrp(meso_messages, attempt_label="fcot_meso")
+                    yield f"  - Stage 2: Received Meso Analysis response.\n"
                     save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
                     fcot_trace['meso'] = micro_observations
                     synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
                     save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
                     synthesis_messages = meso_messages +[{"role": "assistant", "content": micro_observations}, {"role": "user", "content": synthesis_prompt}]
+                    yield f"  - Stage 3: Sending Synthesis/Formatting request to NRP API (Timeout: 600s)...\n"
+                    raw_text = await _call_nrp(synthesis_messages, attempt_label="fcot_synthesis")
+                    yield f"  - Stage 3: Received Synthesis response.\n\n"
                     save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
                     prompt_used = f"FCoT (NRP):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
                 else:
+                    template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
+                    prompt_text = template.format(
                         system_persona=system_persona, caption=caption, transcript=transcript,
                         toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
                     )
                     prompt_text = f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"
                     prompt_used = prompt_text
+                    save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
+                    yield f"Generating Labels (NRP {reasoning_method.upper()})...\n"
+                    yield f"  - Sending Standard request to NRP API (Model: {model_name}, Timeout: 600s)...\n"
                     raw_text = await _call_nrp([
                         {"role": "system", "content": system_persona},
                         {"role": "user", "content": prompt_text}
+                    ], attempt_label=f"standard_{reasoning_method}")
+                    yield f"  - Received response from NRP API.\n\n"
+                    save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
             if raw_text:
                 full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
             missing_fields = validate_parsed_data(accumulated_data, is_text_only)
             if not missing_fields:
+                yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
                 yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
                 break
             if attempt == max_retries:
+                 yield f"Max retries reached. Saving incomplete data.\n"
+                 yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_