Spaces:
Sleeping
Sleeping
Commit ·
4b424d6
1
Parent(s): 1c08c4a
final p1
Browse files- frontend/src/App.tsx +121 -125
- src/app.py +205 -72
- src/benchmarking.py +229 -224
- src/common_utils.py +110 -104
- src/factuality_logic.py +12 -23
- src/inference_logic.py +321 -207
frontend/src/App.tsx
CHANGED
|
@@ -10,77 +10,74 @@ import {
|
|
| 10 |
|
| 11 |
function App() {
|
| 12 |
const[activeTab, setActiveTab] = useState('home');
|
| 13 |
-
const
|
| 14 |
-
const[isProcessing, setIsProcessing] = useState(false);
|
| 15 |
const logContainerRef = useRef<HTMLDivElement>(null);
|
| 16 |
|
| 17 |
// Processing Config State
|
| 18 |
const [modelProvider, setModelProvider] = useState('nrp');
|
| 19 |
-
const
|
| 20 |
const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
|
| 21 |
const[modelName, setModelName] = useState('qwen3'); // Default
|
| 22 |
const[projectId, setProjectId] = useState('');
|
| 23 |
const [location, setLocation] = useState('us-central1');
|
| 24 |
-
const
|
| 25 |
const[reasoningMethod, setReasoningMethod] = useState('cot');
|
| 26 |
const [promptTemplate, setPromptTemplate] = useState('standard');
|
| 27 |
-
const
|
| 28 |
-
const[maxRetries, setMaxRetries] = useState(1);
|
| 29 |
const [availablePrompts, setAvailablePrompts] = useState<any[]>([]);
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
const
|
| 33 |
-
const [predictiveResult, setPredictiveResult] = useState<any>(null);
|
| 34 |
|
| 35 |
// Data States
|
| 36 |
-
const
|
| 37 |
-
const[selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
|
| 38 |
-
const[expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
|
| 39 |
const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
|
| 40 |
|
| 41 |
-
const[singleLinkInput, setSingleLinkInput] = useState('');
|
| 42 |
const [profileList, setProfileList] = useState<any[]>([]);
|
| 43 |
-
const
|
| 44 |
-
const[profilePosts, setProfilePosts] = useState<any[]>([]);
|
| 45 |
-
const [
|
| 46 |
-
const [communityAnalysis, setCommunityAnalysis] = useState<any>(null);
|
| 47 |
-
const[integrityBoard, setIntegrityBoard] = useState<any[]>([]);
|
| 48 |
|
| 49 |
const[datasetList, setDatasetList] = useState<any[]>([]);
|
| 50 |
-
const
|
| 51 |
-
const
|
| 52 |
|
| 53 |
const [benchmarks, setBenchmarks] = useState<any>(null);
|
| 54 |
-
const[leaderboard, setLeaderboard] = useState<any[]>([]);
|
| 55 |
-
const
|
| 56 |
|
| 57 |
// Tags
|
| 58 |
-
const
|
| 59 |
|
| 60 |
// Manual Labeling State
|
| 61 |
-
const
|
| 62 |
-
const[manualCaption, setManualCaption] = useState('');
|
| 63 |
const [manualTags, setManualTags] = useState('');
|
| 64 |
const[manualReasoning, setManualReasoning] = useState('');
|
| 65 |
-
const
|
| 66 |
visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
|
| 67 |
va: 5, vc: 5, ac: 5, final: 50
|
| 68 |
});
|
| 69 |
-
const[showRubric, setShowRubric] = useState(false);
|
| 70 |
-
const
|
| 71 |
-
const
|
| 72 |
-
const
|
| 73 |
|
| 74 |
// Agent Chat State
|
| 75 |
-
const[agentInput, setAgentInput] = useState('');
|
| 76 |
-
const
|
| 77 |
-
const
|
| 78 |
-
const[agentEndpoint, setAgentEndpoint] = useState('/a2a');
|
| 79 |
-
const
|
| 80 |
const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
|
| 81 |
|
| 82 |
// Resampling configuration
|
| 83 |
-
const[resampleCount, setResampleCount] = useState<number>(1);
|
| 84 |
|
| 85 |
// Drag Selection references
|
| 86 |
const isDraggingQueueRef = useRef(false);
|
|
@@ -89,9 +86,9 @@ function App() {
|
|
| 89 |
// Quick Demo State
|
| 90 |
const[demoLink, setDemoLink] = useState('');
|
| 91 |
const [demoLogs, setDemoLogs] = useState('');
|
| 92 |
-
const
|
| 93 |
const[demoResult, setDemoResult] = useState<any>(null);
|
| 94 |
-
const[showDemoConfig, setShowDemoConfig] = useState(false);
|
| 95 |
const demoLogContainerRef = useRef<HTMLDivElement>(null);
|
| 96 |
|
| 97 |
useEffect(() => {
|
|
@@ -125,7 +122,6 @@ function App() {
|
|
| 125 |
setLastQueueIndex(null);
|
| 126 |
}
|
| 127 |
if (activeTab === 'profiles') load('/profiles/list', setProfileList);
|
| 128 |
-
if (activeTab === 'community') load('/community/list_datasets', setCommunityDatasets);
|
| 129 |
if (activeTab === 'analytics') load('/analytics/account_integrity', setIntegrityBoard);
|
| 130 |
if (activeTab === 'dataset' || activeTab === 'manual' || activeTab === 'groundtruth') load('/dataset/list', setDatasetList);
|
| 131 |
if (activeTab === 'manual') load('/queue/list', setQueueList);
|
|
@@ -409,28 +405,6 @@ function App() {
|
|
| 409 |
} catch(e: any) { alert("Network error: " + e.toString()); }
|
| 410 |
};
|
| 411 |
|
| 412 |
-
const analyzeComments = async (id: string) => {
|
| 413 |
-
setCommunityAnalysis({ verdict: "Analyzing..." });
|
| 414 |
-
const res = await fetch('/community/analyze', {
|
| 415 |
-
method: 'POST', headers: {'Content-Type': 'application/json'},
|
| 416 |
-
body: JSON.stringify({ dataset_id: id })
|
| 417 |
-
});
|
| 418 |
-
setCommunityAnalysis(await res.json());
|
| 419 |
-
};
|
| 420 |
-
|
| 421 |
-
const runPredictiveTraining = async (useVisual: boolean) => {
|
| 422 |
-
setPredictiveResult({ status: 'training' });
|
| 423 |
-
try {
|
| 424 |
-
const res = await fetch('/benchmarks/train_predictive', {
|
| 425 |
-
method: 'POST', headers: {'Content-Type': 'application/json'},
|
| 426 |
-
body: JSON.stringify({ use_visual_meta: useVisual, model_type: predictiveModelType })
|
| 427 |
-
});
|
| 428 |
-
const data = await res.json();
|
| 429 |
-
setPredictiveResult(data);
|
| 430 |
-
setRefreshTrigger(p => p+1);
|
| 431 |
-
} catch (e) { setPredictiveResult({ error: "Failed to train." }); }
|
| 432 |
-
};
|
| 433 |
-
|
| 434 |
const queueUnlabeledPosts = async () => {
|
| 435 |
const unlabeled = profilePosts.filter(p => !p.is_labeled).map(p => p.link);
|
| 436 |
if(unlabeled.length === 0) return alert("All posts already labeled!");
|
|
@@ -547,6 +521,9 @@ function App() {
|
|
| 547 |
fd.append('prompt_template', promptTemplate);
|
| 548 |
fd.append('custom_query', customQuery);
|
| 549 |
fd.append('max_reprompts', maxRetries.toString());
|
|
|
|
|
|
|
|
|
|
| 550 |
|
| 551 |
try {
|
| 552 |
const res = await fetch('/queue/run', { method: 'POST', body: fd });
|
|
@@ -597,6 +574,9 @@ function App() {
|
|
| 597 |
fd.append('prompt_template', promptTemplate);
|
| 598 |
fd.append('custom_query', customQuery);
|
| 599 |
fd.append('max_reprompts', maxRetries.toString());
|
|
|
|
|
|
|
|
|
|
| 600 |
|
| 601 |
setDemoLogs(prev => prev + '[SYSTEM] Sending analysis payload to model server...\n');
|
| 602 |
|
|
@@ -736,13 +716,11 @@ function App() {
|
|
| 736 |
{[
|
| 737 |
{id:'home', l:'Home & Benchmarks', i:Home},
|
| 738 |
{id:'agent', l:'Agent Nexus', i:Bot},
|
| 739 |
-
{id:'predictive', l:'Predictive Sandbox', i:FlaskConical},
|
| 740 |
{id:'queue', l:'Ingest Queue', i:List},
|
| 741 |
{id:'profiles', l:'User Profiles', i:Users},
|
| 742 |
{id:'manual', l:'Labeling Studio', i:PenTool},
|
| 743 |
{id:'dataset', l:'Data Manager', i:Archive},
|
| 744 |
{id:'groundtruth', l:'Ground Truth (Verified)', i:ShieldCheck},
|
| 745 |
-
{id:'community', l:'Community Trust', i:MessageSquare},
|
| 746 |
{id:'analytics', l:'Analytics', i:BarChart2}
|
| 747 |
].map(t => (
|
| 748 |
<button key={t.id} onClick={() => setActiveTab(t.id)}
|
|
@@ -817,6 +795,7 @@ function App() {
|
|
| 817 |
<div className="space-y-3">
|
| 818 |
<label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Inference Strategy</label>
|
| 819 |
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
|
|
|
|
| 820 |
<option value="cot">Standard Chain of Thought</option>
|
| 821 |
<option value="fcot">Fractal Chain of Thought</option>
|
| 822 |
</select>
|
|
@@ -825,6 +804,16 @@ function App() {
|
|
| 825 |
<option key={p.id} value={p.id}>{p.name}</option>
|
| 826 |
)) : <option value="standard">Standard</option>}
|
| 827 |
</select>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 828 |
</div>
|
| 829 |
</div>
|
| 830 |
)}
|
|
@@ -1012,11 +1001,11 @@ function App() {
|
|
| 1012 |
<th className="p-3">Model</th>
|
| 1013 |
<th className="p-3">Prompt</th>
|
| 1014 |
<th className="p-3">Reasoning</th>
|
|
|
|
| 1015 |
<th className="p-3 text-center">FCoT Depth</th>
|
| 1016 |
<th className="p-3 text-right text-emerald-400">Accuracy</th>
|
| 1017 |
<th className="p-3 text-right">Comp. MAE</th>
|
| 1018 |
<th className="p-3 text-right">Tag Acc</th>
|
| 1019 |
-
<th className="p-3 text-right">Samples</th>
|
| 1020 |
<th className="p-3"></th>
|
| 1021 |
</tr>
|
| 1022 |
</thead>
|
|
@@ -1027,17 +1016,18 @@ function App() {
|
|
| 1027 |
<td className="p-3 font-mono text-white">{row.model}</td>
|
| 1028 |
<td className="p-3">{row.prompt}</td>
|
| 1029 |
<td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
|
|
|
|
| 1030 |
<td className="p-3 text-center text-slate-400 font-mono">{row.fcot_depth ?? 0}</td>
|
| 1031 |
<td className="p-3 text-right font-bold text-emerald-400">{row.accuracy}%</td>
|
| 1032 |
<td className="p-3 text-right font-mono text-amber-400">{row.comp_mae}</td>
|
| 1033 |
<td className="p-3 text-right">{row.tag_acc}%</td>
|
| 1034 |
-
<td className="p-3 text-
|
| 1035 |
-
<td className="p-3 text-center" title={row.params}>
|
| 1036 |
<div className="group relative">
|
| 1037 |
<HelpCircle className="w-4 h-4 text-slate-600 cursor-help"/>
|
| 1038 |
<div className="absolute right-0 bottom-6 w-64 p-3 bg-black border border-slate-700 rounded shadow-xl hidden group-hover:block z-50 text-[10px] whitespace-pre-wrap text-left">
|
| 1039 |
<div className="font-bold mb-1 text-slate-400">Config Params</div>
|
| 1040 |
-
{row.params}
|
|
|
|
| 1041 |
</div>
|
| 1042 |
</div>
|
| 1043 |
</td>
|
|
@@ -1050,6 +1040,54 @@ function App() {
|
|
| 1050 |
</table>
|
| 1051 |
</div>
|
| 1052 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1053 |
</div>
|
| 1054 |
)}
|
| 1055 |
|
|
@@ -1124,6 +1162,7 @@ function App() {
|
|
| 1124 |
<div className="space-y-1 mt-2">
|
| 1125 |
<label className="text-[10px] text-slate-500">Reasoning Method</label>
|
| 1126 |
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
|
|
|
| 1127 |
<option value="cot">Standard Chain of Thought</option>
|
| 1128 |
<option value="fcot">Fractal Chain of Thought</option>
|
| 1129 |
</select>
|
|
@@ -1211,31 +1250,6 @@ function App() {
|
|
| 1211 |
</div>
|
| 1212 |
)}
|
| 1213 |
|
| 1214 |
-
{/* PREDICTIVE SANDBOX */}
|
| 1215 |
-
{activeTab === 'predictive' && (
|
| 1216 |
-
<div className="flex h-full gap-6">
|
| 1217 |
-
<div className="w-1/3 bg-slate-900/50 border border-slate-800 rounded-xl p-6 flex flex-col gap-6">
|
| 1218 |
-
<div>
|
| 1219 |
-
<h2 className="text-lg font-bold text-white flex items-center gap-2"><FlaskConical className="w-5 h-5"/> Model Sandbox</h2>
|
| 1220 |
-
<p className="text-xs text-slate-400">Train models on the text features of the current Ground Truth dataset.</p>
|
| 1221 |
-
</div>
|
| 1222 |
-
<button onClick={() => runPredictiveTraining(false)} className="w-full py-3 bg-indigo-600 hover:bg-indigo-500 text-white rounded font-bold text-xs">Train Baseline</button>
|
| 1223 |
-
</div>
|
| 1224 |
-
<div className="flex-1 bg-slate-900/50 border border-slate-800 rounded-xl p-6 relative overflow-hidden overflow-y-auto">
|
| 1225 |
-
{predictiveResult ? (
|
| 1226 |
-
predictiveResult.status === 'training' ? (
|
| 1227 |
-
<div className="absolute inset-0 flex items-center justify-center text-indigo-400 animate-pulse">Training Model...</div>
|
| 1228 |
-
) : predictiveResult.error ? ( <div className="text-red-400">{predictiveResult.error}</div> ) : (
|
| 1229 |
-
<div className="space-y-6">
|
| 1230 |
-
<div className="text-xl font-mono text-white">Training Complete ({predictiveResult.type})</div>
|
| 1231 |
-
<pre className="text-xs text-slate-400 bg-black p-4 rounded">{JSON.stringify(predictiveResult, null, 2)}</pre>
|
| 1232 |
-
</div>
|
| 1233 |
-
)
|
| 1234 |
-
) : <div className="flex h-full items-center justify-center text-slate-600">Ready to train.</div>}
|
| 1235 |
-
</div>
|
| 1236 |
-
</div>
|
| 1237 |
-
)}
|
| 1238 |
-
|
| 1239 |
{/* QUEUE TAB */}
|
| 1240 |
{activeTab === 'queue' && (
|
| 1241 |
<div className="flex h-full gap-6">
|
|
@@ -1320,6 +1334,7 @@ function App() {
|
|
| 1320 |
<div className="space-y-1 mt-2">
|
| 1321 |
<label className="text-[10px] text-slate-500">Reasoning Method</label>
|
| 1322 |
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
|
|
|
| 1323 |
<option value="cot">Standard Chain of Thought</option>
|
| 1324 |
<option value="fcot">Fractal Chain of Thought</option>
|
| 1325 |
</select>
|
|
@@ -1333,6 +1348,18 @@ function App() {
|
|
| 1333 |
</select>
|
| 1334 |
</div>
|
| 1335 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1336 |
{/* Process Controls */}
|
| 1337 |
{isProcessing ? (
|
| 1338 |
<button onClick={stopProcessing} className="w-full py-2 bg-red-600 hover:bg-red-500 text-white rounded font-bold text-xs flex items-center justify-center gap-2 animate-pulse">
|
|
@@ -1721,7 +1748,7 @@ function App() {
|
|
| 1721 |
<span className="capitalize text-slate-300 font-bold">{k}</span>
|
| 1722 |
<span className="text-indigo-400 font-mono font-bold">{(manualScores as any)[k]}/10</span>
|
| 1723 |
</div>
|
| 1724 |
-
<input type="range" min="1" max="10" value={(manualScores as any)[k]} onChange={e => setManualScores({...manualScores,
|
| 1725 |
</div>
|
| 1726 |
))}
|
| 1727 |
</div>
|
|
@@ -1813,37 +1840,6 @@ function App() {
|
|
| 1813 |
</div>
|
| 1814 |
)}
|
| 1815 |
|
| 1816 |
-
{/* COMMUNITY AND ANALYTICS TABS (UNCHANGED) */}
|
| 1817 |
-
{activeTab === 'community' && (
|
| 1818 |
-
<div className="flex h-full gap-6">
|
| 1819 |
-
<div className="w-1/3 bg-slate-900/50 border border-slate-800 rounded-xl overflow-auto">
|
| 1820 |
-
<div className="p-3 bg-slate-950 border-b border-slate-800 text-xs font-bold text-slate-400">Comment Datasets</div>
|
| 1821 |
-
{communityDatasets.map((d, i) => (
|
| 1822 |
-
<div key={i} onClick={() => analyzeComments(d.id)} className="p-4 border-b border-slate-800/50 cursor-pointer hover:bg-white/5">
|
| 1823 |
-
<div className="text-xs font-mono text-indigo-400 mb-1">{d.id}</div>
|
| 1824 |
-
<div className="text-[10px] text-slate-500">{d.count} comments</div>
|
| 1825 |
-
</div>
|
| 1826 |
-
))}
|
| 1827 |
-
</div>
|
| 1828 |
-
<div className="flex-1 flex flex-col justify-center items-center bg-slate-900/20 border border-slate-800 rounded-xl p-8">
|
| 1829 |
-
{communityAnalysis ? (
|
| 1830 |
-
<div className="text-center w-full max-w-md">
|
| 1831 |
-
<div className="text-xs uppercase text-slate-500 mb-2 tracking-widest">Community Quantization</div>
|
| 1832 |
-
<h2 className="text-5xl font-bold text-white mb-2">{communityAnalysis.trust_score?.toFixed(0)}<span className="text-xl text-slate-600">/100</span></h2>
|
| 1833 |
-
<div className={`text-lg font-bold mb-8 px-4 py-1 rounded-full inline-block ${communityAnalysis.trust_score < 40 ? 'bg-red-500/10 text-red-400' : 'bg-emerald-500/10 text-emerald-400'}`}>
|
| 1834 |
-
{communityAnalysis.verdict}
|
| 1835 |
-
</div>
|
| 1836 |
-
</div>
|
| 1837 |
-
) : (
|
| 1838 |
-
<div className="text-slate-600 flex flex-col items-center">
|
| 1839 |
-
<MessageSquare className="w-12 h-12 mb-4 opacity-20"/>
|
| 1840 |
-
<span>Select a dataset to analyze community sentiment.</span>
|
| 1841 |
-
</div>
|
| 1842 |
-
)}
|
| 1843 |
-
</div>
|
| 1844 |
-
</div>
|
| 1845 |
-
)}
|
| 1846 |
-
|
| 1847 |
{activeTab === 'analytics' && (
|
| 1848 |
<div className="h-full overflow-auto">
|
| 1849 |
<div className="flex items-center justify-between mb-4">
|
|
|
|
| 10 |
|
| 11 |
function App() {
|
| 12 |
const[activeTab, setActiveTab] = useState('home');
|
| 13 |
+
const[logs, setLogs] = useState<string>('System Ready.\n');
|
| 14 |
+
const [isProcessing, setIsProcessing] = useState(false);
|
| 15 |
const logContainerRef = useRef<HTMLDivElement>(null);
|
| 16 |
|
| 17 |
// Processing Config State
|
| 18 |
const [modelProvider, setModelProvider] = useState('nrp');
|
| 19 |
+
const[apiKey, setApiKey] = useState('');
|
| 20 |
const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
|
| 21 |
const[modelName, setModelName] = useState('qwen3'); // Default
|
| 22 |
const[projectId, setProjectId] = useState('');
|
| 23 |
const [location, setLocation] = useState('us-central1');
|
| 24 |
+
const[includeComments, setIncludeComments] = useState(false);
|
| 25 |
const[reasoningMethod, setReasoningMethod] = useState('cot');
|
| 26 |
const [promptTemplate, setPromptTemplate] = useState('standard');
|
| 27 |
+
const[customQuery, setCustomQuery] = useState('');
|
| 28 |
+
const [maxRetries, setMaxRetries] = useState(1);
|
| 29 |
const [availablePrompts, setAvailablePrompts] = useState<any[]>([]);
|
| 30 |
+
|
| 31 |
+
const [useSearch, setUseSearch] = useState(false);
|
| 32 |
+
const[useCode, setUseCode] = useState(false);
|
|
|
|
| 33 |
|
| 34 |
// Data States
|
| 35 |
+
const[queueList, setQueueList] = useState<any[]>([]);
|
| 36 |
+
const [selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
|
| 37 |
+
const [expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
|
| 38 |
const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
|
| 39 |
|
| 40 |
+
const [singleLinkInput, setSingleLinkInput] = useState('');
|
| 41 |
const [profileList, setProfileList] = useState<any[]>([]);
|
| 42 |
+
const[selectedProfile, setSelectedProfile] = useState<any>(null);
|
| 43 |
+
const [profilePosts, setProfilePosts] = useState<any[]>([]);
|
| 44 |
+
const [integrityBoard, setIntegrityBoard] = useState<any[]>([]);
|
|
|
|
|
|
|
| 45 |
|
| 46 |
const[datasetList, setDatasetList] = useState<any[]>([]);
|
| 47 |
+
const[selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
|
| 48 |
+
const[lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
|
| 49 |
|
| 50 |
const [benchmarks, setBenchmarks] = useState<any>(null);
|
| 51 |
+
const [leaderboard, setLeaderboard] = useState<any[]>([]);
|
| 52 |
+
const[refreshTrigger, setRefreshTrigger] = useState(0);
|
| 53 |
|
| 54 |
// Tags
|
| 55 |
+
const[configuredTags, setConfiguredTags] = useState<any>({});
|
| 56 |
|
| 57 |
// Manual Labeling State
|
| 58 |
+
const[manualLink, setManualLink] = useState('');
|
| 59 |
+
const [manualCaption, setManualCaption] = useState('');
|
| 60 |
const [manualTags, setManualTags] = useState('');
|
| 61 |
const[manualReasoning, setManualReasoning] = useState('');
|
| 62 |
+
const[manualScores, setManualScores] = useState({
|
| 63 |
visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
|
| 64 |
va: 5, vc: 5, ac: 5, final: 50
|
| 65 |
});
|
| 66 |
+
const [showRubric, setShowRubric] = useState(false);
|
| 67 |
+
const[aiReference, setAiReference] = useState<any>(null);
|
| 68 |
+
const[labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
|
| 69 |
+
const[labelFilter, setLabelFilter] = useState('');
|
| 70 |
|
| 71 |
// Agent Chat State
|
| 72 |
+
const [agentInput, setAgentInput] = useState('');
|
| 73 |
+
const[agentMessages, setAgentMessages] = useState<any[]>([]);
|
| 74 |
+
const[agentThinking, setAgentThinking] = useState(false);
|
| 75 |
+
const [agentEndpoint, setAgentEndpoint] = useState('/a2a');
|
| 76 |
+
const[agentMethod, setAgentMethod] = useState('agent.process');
|
| 77 |
const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
|
| 78 |
|
| 79 |
// Resampling configuration
|
| 80 |
+
const [resampleCount, setResampleCount] = useState<number>(1);
|
| 81 |
|
| 82 |
// Drag Selection references
|
| 83 |
const isDraggingQueueRef = useRef(false);
|
|
|
|
| 86 |
// Quick Demo State
|
| 87 |
const[demoLink, setDemoLink] = useState('');
|
| 88 |
const [demoLogs, setDemoLogs] = useState('');
|
| 89 |
+
const[demoIsProcessing, setDemoIsProcessing] = useState(false);
|
| 90 |
const[demoResult, setDemoResult] = useState<any>(null);
|
| 91 |
+
const [showDemoConfig, setShowDemoConfig] = useState(false);
|
| 92 |
const demoLogContainerRef = useRef<HTMLDivElement>(null);
|
| 93 |
|
| 94 |
useEffect(() => {
|
|
|
|
| 122 |
setLastQueueIndex(null);
|
| 123 |
}
|
| 124 |
if (activeTab === 'profiles') load('/profiles/list', setProfileList);
|
|
|
|
| 125 |
if (activeTab === 'analytics') load('/analytics/account_integrity', setIntegrityBoard);
|
| 126 |
if (activeTab === 'dataset' || activeTab === 'manual' || activeTab === 'groundtruth') load('/dataset/list', setDatasetList);
|
| 127 |
if (activeTab === 'manual') load('/queue/list', setQueueList);
|
|
|
|
| 405 |
} catch(e: any) { alert("Network error: " + e.toString()); }
|
| 406 |
};
|
| 407 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
const queueUnlabeledPosts = async () => {
|
| 409 |
const unlabeled = profilePosts.filter(p => !p.is_labeled).map(p => p.link);
|
| 410 |
if(unlabeled.length === 0) return alert("All posts already labeled!");
|
|
|
|
| 521 |
fd.append('prompt_template', promptTemplate);
|
| 522 |
fd.append('custom_query', customQuery);
|
| 523 |
fd.append('max_reprompts', maxRetries.toString());
|
| 524 |
+
|
| 525 |
+
fd.append('use_search', useSearch.toString());
|
| 526 |
+
fd.append('use_code', useCode.toString());
|
| 527 |
|
| 528 |
try {
|
| 529 |
const res = await fetch('/queue/run', { method: 'POST', body: fd });
|
|
|
|
| 574 |
fd.append('prompt_template', promptTemplate);
|
| 575 |
fd.append('custom_query', customQuery);
|
| 576 |
fd.append('max_reprompts', maxRetries.toString());
|
| 577 |
+
|
| 578 |
+
fd.append('use_search', useSearch.toString());
|
| 579 |
+
fd.append('use_code', useCode.toString());
|
| 580 |
|
| 581 |
setDemoLogs(prev => prev + '[SYSTEM] Sending analysis payload to model server...\n');
|
| 582 |
|
|
|
|
| 716 |
{[
|
| 717 |
{id:'home', l:'Home & Benchmarks', i:Home},
|
| 718 |
{id:'agent', l:'Agent Nexus', i:Bot},
|
|
|
|
| 719 |
{id:'queue', l:'Ingest Queue', i:List},
|
| 720 |
{id:'profiles', l:'User Profiles', i:Users},
|
| 721 |
{id:'manual', l:'Labeling Studio', i:PenTool},
|
| 722 |
{id:'dataset', l:'Data Manager', i:Archive},
|
| 723 |
{id:'groundtruth', l:'Ground Truth (Verified)', i:ShieldCheck},
|
|
|
|
| 724 |
{id:'analytics', l:'Analytics', i:BarChart2}
|
| 725 |
].map(t => (
|
| 726 |
<button key={t.id} onClick={() => setActiveTab(t.id)}
|
|
|
|
| 795 |
<div className="space-y-3">
|
| 796 |
<label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Inference Strategy</label>
|
| 797 |
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
|
| 798 |
+
<option value="none">Direct (No CoT)</option>
|
| 799 |
<option value="cot">Standard Chain of Thought</option>
|
| 800 |
<option value="fcot">Fractal Chain of Thought</option>
|
| 801 |
</select>
|
|
|
|
| 804 |
<option key={p.id} value={p.id}>{p.name}</option>
|
| 805 |
)) : <option value="standard">Standard</option>}
|
| 806 |
</select>
|
| 807 |
+
|
| 808 |
+
<label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1 mt-3">Agentic Tools</label>
|
| 809 |
+
<label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
|
| 810 |
+
<input type="checkbox" className="accent-indigo-500" checked={useSearch} onChange={e => setUseSearch(e.target.checked)} />
|
| 811 |
+
Enable Web Search Retrieval
|
| 812 |
+
</label>
|
| 813 |
+
<label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
|
| 814 |
+
<input type="checkbox" className="accent-indigo-500" checked={useCode} onChange={e => setUseCode(e.target.checked)} />
|
| 815 |
+
Enable Code Execution
|
| 816 |
+
</label>
|
| 817 |
</div>
|
| 818 |
</div>
|
| 819 |
)}
|
|
|
|
| 1001 |
<th className="p-3">Model</th>
|
| 1002 |
<th className="p-3">Prompt</th>
|
| 1003 |
<th className="p-3">Reasoning</th>
|
| 1004 |
+
<th className="p-3 text-center">Tools</th>
|
| 1005 |
<th className="p-3 text-center">FCoT Depth</th>
|
| 1006 |
<th className="p-3 text-right text-emerald-400">Accuracy</th>
|
| 1007 |
<th className="p-3 text-right">Comp. MAE</th>
|
| 1008 |
<th className="p-3 text-right">Tag Acc</th>
|
|
|
|
| 1009 |
<th className="p-3"></th>
|
| 1010 |
</tr>
|
| 1011 |
</thead>
|
|
|
|
| 1016 |
<td className="p-3 font-mono text-white">{row.model}</td>
|
| 1017 |
<td className="p-3">{row.prompt}</td>
|
| 1018 |
<td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
|
| 1019 |
+
<td className="p-3 text-center text-sky-400 font-mono text-[10px]">{row.tools || 'None'}</td>
|
| 1020 |
<td className="p-3 text-center text-slate-400 font-mono">{row.fcot_depth ?? 0}</td>
|
| 1021 |
<td className="p-3 text-right font-bold text-emerald-400">{row.accuracy}%</td>
|
| 1022 |
<td className="p-3 text-right font-mono text-amber-400">{row.comp_mae}</td>
|
| 1023 |
<td className="p-3 text-right">{row.tag_acc}%</td>
|
| 1024 |
+
<td className="p-3 text-center">
|
|
|
|
| 1025 |
<div className="group relative">
|
| 1026 |
<HelpCircle className="w-4 h-4 text-slate-600 cursor-help"/>
|
| 1027 |
<div className="absolute right-0 bottom-6 w-64 p-3 bg-black border border-slate-700 rounded shadow-xl hidden group-hover:block z-50 text-[10px] whitespace-pre-wrap text-left">
|
| 1028 |
<div className="font-bold mb-1 text-slate-400">Config Params</div>
|
| 1029 |
+
<div>{row.params}</div>
|
| 1030 |
+
<div className="mt-2 pt-2 border-t border-slate-800 text-slate-400 font-bold">Samples: {row.samples}</div>
|
| 1031 |
</div>
|
| 1032 |
</div>
|
| 1033 |
</td>
|
|
|
|
| 1040 |
</table>
|
| 1041 |
</div>
|
| 1042 |
</div>
|
| 1043 |
+
|
| 1044 |
+
{/* Detailed Vector Accuracies */}
|
| 1045 |
+
<div className="bg-slate-900/50 border border-slate-800 rounded-xl p-6 mt-6 mb-8">
|
| 1046 |
+
<h3 className="text-sm font-bold text-white uppercase mb-4 flex items-center gap-2">
|
| 1047 |
+
<BarChart2 className="w-4 h-4 text-sky-400"/> Detailed Vector Error Analysis (MAE)
|
| 1048 |
+
</h3>
|
| 1049 |
+
<div className="overflow-x-auto">
|
| 1050 |
+
<table className="w-full text-left text-xs text-slate-400">
|
| 1051 |
+
<thead className="bg-slate-950 text-slate-500 uppercase">
|
| 1052 |
+
<tr>
|
| 1053 |
+
<th className="p-3">Model</th>
|
| 1054 |
+
<th className="p-3">Prompt</th>
|
| 1055 |
+
<th className="p-3">Reasoning</th>
|
| 1056 |
+
<th className="p-3">Tools / Techniques</th>
|
| 1057 |
+
<th className="p-3 text-right">Vis</th>
|
| 1058 |
+
<th className="p-3 text-right">Aud</th>
|
| 1059 |
+
<th className="p-3 text-right">Src</th>
|
| 1060 |
+
<th className="p-3 text-right">Log</th>
|
| 1061 |
+
<th className="p-3 text-right">Emo</th>
|
| 1062 |
+
<th className="p-3 text-right">V-A</th>
|
| 1063 |
+
<th className="p-3 text-right">V-C</th>
|
| 1064 |
+
<th className="p-3 text-right">A-C</th>
|
| 1065 |
+
</tr>
|
| 1066 |
+
</thead>
|
| 1067 |
+
<tbody className="divide-y divide-slate-800">
|
| 1068 |
+
{leaderboard && leaderboard.map((row, i) => (
|
| 1069 |
+
<tr key={i} className="hover:bg-white/5">
|
| 1070 |
+
<td className="p-3 font-mono text-white">{row.model}</td>
|
| 1071 |
+
<td className="p-3">{row.prompt}</td>
|
| 1072 |
+
<td className="p-3 uppercase text-[10px]">{row.reasoning}</td>
|
| 1073 |
+
<td className="p-3 text-sky-400 font-mono text-[10px]">{row.tools || 'None'}</td>
|
| 1074 |
+
<td className="p-3 text-right font-mono">{row.err_visual_score ?? '-'}</td>
|
| 1075 |
+
<td className="p-3 text-right font-mono">{row.err_audio_score ?? '-'}</td>
|
| 1076 |
+
<td className="p-3 text-right font-mono">{row.err_source_score ?? '-'}</td>
|
| 1077 |
+
<td className="p-3 text-right font-mono">{row.err_logic_score ?? '-'}</td>
|
| 1078 |
+
<td className="p-3 text-right font-mono">{row.err_emotion_score ?? '-'}</td>
|
| 1079 |
+
<td className="p-3 text-right font-mono">{row.err_align_video_audio ?? '-'}</td>
|
| 1080 |
+
<td className="p-3 text-right font-mono">{row.err_align_video_caption ?? '-'}</td>
|
| 1081 |
+
<td className="p-3 text-right font-mono">{row.err_align_audio_caption ?? '-'}</td>
|
| 1082 |
+
</tr>
|
| 1083 |
+
))}
|
| 1084 |
+
{(!leaderboard || leaderboard.length === 0) && (
|
| 1085 |
+
<tr><td colSpan={12} className="p-4 text-center text-slate-600">No detailed benchmark data available.</td></tr>
|
| 1086 |
+
)}
|
| 1087 |
+
</tbody>
|
| 1088 |
+
</table>
|
| 1089 |
+
</div>
|
| 1090 |
+
</div>
|
| 1091 |
</div>
|
| 1092 |
)}
|
| 1093 |
|
|
|
|
| 1162 |
<div className="space-y-1 mt-2">
|
| 1163 |
<label className="text-[10px] text-slate-500">Reasoning Method</label>
|
| 1164 |
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
| 1165 |
+
<option value="none">Direct (No CoT)</option>
|
| 1166 |
<option value="cot">Standard Chain of Thought</option>
|
| 1167 |
<option value="fcot">Fractal Chain of Thought</option>
|
| 1168 |
</select>
|
|
|
|
| 1250 |
</div>
|
| 1251 |
)}
|
| 1252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1253 |
{/* QUEUE TAB */}
|
| 1254 |
{activeTab === 'queue' && (
|
| 1255 |
<div className="flex h-full gap-6">
|
|
|
|
| 1334 |
<div className="space-y-1 mt-2">
|
| 1335 |
<label className="text-[10px] text-slate-500">Reasoning Method</label>
|
| 1336 |
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
| 1337 |
+
<option value="none">Direct (No CoT)</option>
|
| 1338 |
<option value="cot">Standard Chain of Thought</option>
|
| 1339 |
<option value="fcot">Fractal Chain of Thought</option>
|
| 1340 |
</select>
|
|
|
|
| 1348 |
</select>
|
| 1349 |
</div>
|
| 1350 |
|
| 1351 |
+
<div className="space-y-2 mt-2">
|
| 1352 |
+
<label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Agentic Tools</label>
|
| 1353 |
+
<label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
|
| 1354 |
+
<input type="checkbox" className="accent-indigo-500" checked={useSearch} onChange={e => setUseSearch(e.target.checked)} />
|
| 1355 |
+
Enable Web Search Retrieval
|
| 1356 |
+
</label>
|
| 1357 |
+
<label className="flex items-center gap-2 text-xs text-slate-300 cursor-pointer">
|
| 1358 |
+
<input type="checkbox" className="accent-indigo-500" checked={useCode} onChange={e => setUseCode(e.target.checked)} />
|
| 1359 |
+
Enable Code Execution
|
| 1360 |
+
</label>
|
| 1361 |
+
</div>
|
| 1362 |
+
|
| 1363 |
{/* Process Controls */}
|
| 1364 |
{isProcessing ? (
|
| 1365 |
<button onClick={stopProcessing} className="w-full py-2 bg-red-600 hover:bg-red-500 text-white rounded font-bold text-xs flex items-center justify-center gap-2 animate-pulse">
|
|
|
|
| 1748 |
<span className="capitalize text-slate-300 font-bold">{k}</span>
|
| 1749 |
<span className="text-indigo-400 font-mono font-bold">{(manualScores as any)[k]}/10</span>
|
| 1750 |
</div>
|
| 1751 |
+
<input type="range" min="1" max="10" value={(manualScores as any)[k]} onChange={e => setManualScores({...manualScores,[k]: parseInt(e.target.value)})} className="w-full accent-indigo-500"/>
|
| 1752 |
</div>
|
| 1753 |
))}
|
| 1754 |
</div>
|
|
|
|
| 1840 |
</div>
|
| 1841 |
)}
|
| 1842 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1843 |
{activeTab === 'analytics' && (
|
| 1844 |
<div className="h-full overflow-auto">
|
| 1845 |
<div className="flex items-center justify-between mb-4">
|
src/app.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
|
| 4 |
-
#
|
| 5 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 6 |
if current_dir not in sys.path:
|
| 7 |
sys.path.append(current_dir)
|
|
@@ -30,11 +30,12 @@ import agent_logic
|
|
| 30 |
import common_utils
|
| 31 |
|
| 32 |
from toon_parser import parse_veracity_toon
|
| 33 |
-
from labeling_logic import PROMPT_VARIANTS, LABELING_PROMPT_TEMPLATE, FCOT_MACRO_PROMPT
|
| 34 |
import benchmarking
|
| 35 |
|
| 36 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| 37 |
logger = logging.getLogger(__name__)
|
|
|
|
| 38 |
LITE_MODE = os.getenv("LITE_MODE", "true").lower() == "true"
|
| 39 |
|
| 40 |
app = FastAPI()
|
|
@@ -78,19 +79,19 @@ except Exception as e:
|
|
| 78 |
agent_mount_status = f"error_{str(e)}"
|
| 79 |
|
| 80 |
# --- Static Files & Frontend ---
|
| 81 |
-
STATIC_DIR = "
|
| 82 |
-
if
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
|
| 91 |
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
| 92 |
|
| 93 |
-
#
|
| 94 |
assets_path = os.path.join(STATIC_DIR, "assets")
|
| 95 |
if os.path.exists(assets_path):
|
| 96 |
app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
|
|
@@ -227,10 +228,6 @@ async def get_benchmark_stats():
|
|
| 227 |
async def get_benchmark_leaderboard():
|
| 228 |
return benchmarking.generate_leaderboard()
|
| 229 |
|
| 230 |
-
@app.post("/benchmarks/train_predictive")
|
| 231 |
-
async def run_predictive_training(config: dict = Body(...)):
|
| 232 |
-
return benchmarking.train_predictive_sandbox(config)
|
| 233 |
-
|
| 234 |
@app.get("/config/prompts")
|
| 235 |
async def list_prompts():
|
| 236 |
return [{"id": k, "name": v['description']} for k, v in PROMPT_VARIANTS.items()]
|
|
@@ -260,7 +257,7 @@ async def list_all_tags():
|
|
| 260 |
t = t.strip()
|
| 261 |
if t: tags_count[t] = tags_count.get(t, 0) + 1
|
| 262 |
sorted_tags = sorted(tags_count.items(), key=lambda x: x[1], reverse=True)
|
| 263 |
-
return
|
| 264 |
|
| 265 |
@app.post("/extension/ingest")
|
| 266 |
async def extension_ingest_link(request: Request):
|
|
@@ -304,7 +301,7 @@ async def promote_to_ground_truth(request: Request):
|
|
| 304 |
try:
|
| 305 |
data = await request.json()
|
| 306 |
target_ids = data.get("ids",[])
|
| 307 |
-
if not target_ids and data.get("id"): target_ids =
|
| 308 |
|
| 309 |
if not target_ids: return JSONResponse({"status": "error", "message": "No IDs provided"}, status_code=400)
|
| 310 |
|
|
@@ -362,7 +359,7 @@ async def delete_ground_truth(request: Request):
|
|
| 362 |
try:
|
| 363 |
data = await request.json()
|
| 364 |
target_ids = data.get("ids",[])
|
| 365 |
-
if not target_ids and data.get("id"): target_ids =
|
| 366 |
if not target_ids: raise HTTPException(status_code=400)
|
| 367 |
|
| 368 |
target_ids =[str(t) for t in target_ids]
|
|
@@ -619,6 +616,29 @@ async def save_manual_label(request: Request):
|
|
| 619 |
writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS, extrasaction='ignore')
|
| 620 |
writer.writeheader()
|
| 621 |
writer.writerows(rows)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
|
| 623 |
update_queue_status(link, "Processed")
|
| 624 |
return {"status": "success", "id": tweet_id}
|
|
@@ -626,29 +646,6 @@ async def save_manual_label(request: Request):
|
|
| 626 |
logger.error(f"Save Manual Error: {e}")
|
| 627 |
return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 628 |
|
| 629 |
-
@app.get("/community/list_datasets")
|
| 630 |
-
async def list_community_datasets():
|
| 631 |
-
path = Path("data/comments")
|
| 632 |
-
files =[]
|
| 633 |
-
if path.exists():
|
| 634 |
-
for f in path.glob("*.csv"):
|
| 635 |
-
files.append({"id": f.stem, "count": sum(1 for _ in open(f, encoding='utf-8'))-1})
|
| 636 |
-
return files
|
| 637 |
-
|
| 638 |
-
@app.post("/community/analyze")
|
| 639 |
-
async def analyze_community(dataset_id: str = Body(..., embed=True)):
|
| 640 |
-
path = Path(f"data/comments/{dataset_id}.csv")
|
| 641 |
-
if not path.exists(): raise HTTPException(status_code=404)
|
| 642 |
-
comments = list(common_utils.robust_read_csv(path))
|
| 643 |
-
if not comments: return {"score": 0, "verdict": "No Data"}
|
| 644 |
-
s_keys =["fake", "lie", "staged", "bs", "propaganda", "ai", "deepfake"]
|
| 645 |
-
t_keys =["true", "real", "confirmed", "fact", "source", "proof"]
|
| 646 |
-
s_count = sum(1 for c in comments if any(k in c['text'].lower() for k in s_keys))
|
| 647 |
-
t_count = sum(1 for c in comments if any(k in c['text'].lower() for k in t_keys))
|
| 648 |
-
score = max(0, min(100, 50 + (t_count * 2) - (s_count * 5)))
|
| 649 |
-
verdict = "Community Skepticism" if score < 30 else "Community Verification" if score > 70 else "Neutral/Mixed"
|
| 650 |
-
return {"dataset_id": dataset_id, "trust_score": score, "verdict": verdict, "details": {"skeptical_comments": s_count, "trusting_comments": t_count}}
|
| 651 |
-
|
| 652 |
@app.get("/dataset/list")
|
| 653 |
async def get_dataset_list():
|
| 654 |
dataset =[]
|
|
@@ -673,26 +670,77 @@ async def get_dataset_list():
|
|
| 673 |
async def get_account_integrity():
|
| 674 |
id_map = {}
|
| 675 |
prof_dir = Path("data/profiles")
|
|
|
|
|
|
|
|
|
|
| 676 |
if prof_dir.exists():
|
| 677 |
for d in prof_dir.iterdir():
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 681 |
|
| 682 |
scores_map = {}
|
| 683 |
for fname in ["data/dataset.csv", "data/manual_dataset.csv"]:
|
| 684 |
-
|
|
|
|
|
|
|
| 685 |
tid = row.get('id')
|
|
|
|
| 686 |
sc = row.get('final_veracity_score', '0')
|
|
|
|
|
|
|
| 687 |
try: val = float(re.sub(r'[^\d.]', '', str(sc)))
|
| 688 |
-
except: val =
|
| 689 |
|
| 690 |
-
|
| 691 |
-
if
|
| 692 |
-
|
| 693 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
|
| 695 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 696 |
|
| 697 |
@app.post("/queue/add")
|
| 698 |
async def add_queue_item(link: str = Body(..., embed=True)):
|
|
@@ -838,19 +886,6 @@ async def analyze_user_context(request: Request):
|
|
| 838 |
return {"status": "success", "report": rep}
|
| 839 |
except Exception as e: return JSONResponse({"error": str(e)}, status_code=500)
|
| 840 |
|
| 841 |
-
@app.get("/download-dataset")
|
| 842 |
-
async def download_dataset():
|
| 843 |
-
file_path = Path("data/dataset.csv")
|
| 844 |
-
if file_path.exists():
|
| 845 |
-
return FileResponse(path=file_path, filename="dataset.csv", media_type='text/csv')
|
| 846 |
-
return Response("Dataset not found.", status_code=404)
|
| 847 |
-
|
| 848 |
-
@app.get("/model-architecture", response_class=PlainTextResponse)
|
| 849 |
-
async def get_model_architecture():
|
| 850 |
-
if LITE_MODE: return "Running in LITE mode."
|
| 851 |
-
if inference_logic.base_model: return str(inference_logic.base_model)
|
| 852 |
-
return "Model not loaded."
|
| 853 |
-
|
| 854 |
@app.get("/", response_class=HTMLResponse)
|
| 855 |
async def read_root(request: Request):
|
| 856 |
return templates.TemplateResponse("index.html", {"request": request})
|
|
@@ -896,14 +931,15 @@ async def run_queue_processing(
|
|
| 896 |
vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
|
| 897 |
nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
|
| 898 |
include_comments: bool = Form(False), reasoning_method: str = Form("cot"), prompt_template: str = Form("standard"),
|
| 899 |
-
custom_query: str = Form(""), max_reprompts: int = Form(1)
|
|
|
|
| 900 |
):
|
| 901 |
global STOP_QUEUE_SIGNAL
|
| 902 |
STOP_QUEUE_SIGNAL = False
|
| 903 |
|
| 904 |
-
gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name, "max_retries": max_reprompts}
|
| 905 |
-
vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key, "max_retries": max_reprompts, "use_search":
|
| 906 |
-
nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url, "max_retries": max_reprompts}
|
| 907 |
|
| 908 |
sel_p = PROMPT_VARIANTS.get(prompt_template, PROMPT_VARIANTS['standard'])
|
| 909 |
system_persona_txt = sel_p['instruction']
|
|
@@ -922,7 +958,9 @@ async def run_queue_processing(
|
|
| 922 |
config_params_dict = {
|
| 923 |
"reprompts": max_reprompts,
|
| 924 |
"include_comments": include_comments,
|
| 925 |
-
"agent_active": False
|
|
|
|
|
|
|
| 926 |
}
|
| 927 |
config_params_str = json.dumps(config_params_dict)
|
| 928 |
|
|
@@ -953,7 +991,7 @@ async def run_queue_processing(
|
|
| 953 |
gt_data = row
|
| 954 |
break
|
| 955 |
|
| 956 |
-
yield f"data:
|
| 957 |
tid = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
|
| 958 |
assets = await common_utils.prepare_video_assets(link, tid)
|
| 959 |
|
|
@@ -1029,4 +1067,99 @@ async def run_queue_processing(
|
|
| 1029 |
yield f"data: Emotional Manipul. : AI {s_float(vec_ai.get('emotional_manipulation_score'))} | GT {s_float(gt_data.get('emotional_manipulation_score'))}\n"
|
| 1030 |
yield f"data: Video-Audio Align : AI {s_float(mod_ai.get('video_audio_score'))} | GT {s_float(gt_data.get('video_audio_score'))}\n"
|
| 1031 |
yield f"data: Video-Caption Align: AI {s_float(mod_ai.get('video_caption_score'))} | GT {s_float(gt_data.get('video_caption_score'))}\n"
|
| 1032 |
-
yield f"data: Audio-Caption Align: AI {s_float(mod_ai.get('audio_caption_score'))} | GT {s_float(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
|
| 4 |
+
# Ensure 'src' is in sys.path so sibling imports work
|
| 5 |
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 6 |
if current_dir not in sys.path:
|
| 7 |
sys.path.append(current_dir)
|
|
|
|
| 30 |
import common_utils
|
| 31 |
|
| 32 |
from toon_parser import parse_veracity_toon
|
| 33 |
+
from labeling_logic import PROMPT_VARIANTS, LABELING_PROMPT_TEMPLATE, LABELING_PROMPT_TEMPLATE_NO_COT, FCOT_MACRO_PROMPT
|
| 34 |
import benchmarking
|
| 35 |
|
| 36 |
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| 37 |
logger = logging.getLogger(__name__)
|
| 38 |
+
|
| 39 |
LITE_MODE = os.getenv("LITE_MODE", "true").lower() == "true"
|
| 40 |
|
| 41 |
app = FastAPI()
|
|
|
|
| 79 |
agent_mount_status = f"error_{str(e)}"
|
| 80 |
|
| 81 |
# --- Static Files & Frontend ---
|
| 82 |
+
STATIC_DIR = "static"
|
| 83 |
+
if os.path.isdir("/app/static"):
|
| 84 |
+
STATIC_DIR = "/app/static"
|
| 85 |
+
elif os.path.isdir("/usr/share/vchat/static"):
|
| 86 |
+
STATIC_DIR = "/usr/share/vchat/static"
|
| 87 |
+
elif os.path.isdir("frontend/dist"):
|
| 88 |
+
STATIC_DIR = "frontend/dist"
|
| 89 |
+
elif not os.path.isdir(STATIC_DIR):
|
| 90 |
+
os.makedirs(STATIC_DIR, exist_ok=True)
|
| 91 |
|
| 92 |
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
| 93 |
|
| 94 |
+
# Explicitly mount assets for Vite support
|
| 95 |
assets_path = os.path.join(STATIC_DIR, "assets")
|
| 96 |
if os.path.exists(assets_path):
|
| 97 |
app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
|
|
|
|
| 228 |
async def get_benchmark_leaderboard():
|
| 229 |
return benchmarking.generate_leaderboard()
|
| 230 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
@app.get("/config/prompts")
|
| 232 |
async def list_prompts():
|
| 233 |
return [{"id": k, "name": v['description']} for k, v in PROMPT_VARIANTS.items()]
|
|
|
|
| 257 |
t = t.strip()
|
| 258 |
if t: tags_count[t] = tags_count.get(t, 0) + 1
|
| 259 |
sorted_tags = sorted(tags_count.items(), key=lambda x: x[1], reverse=True)
|
| 260 |
+
return[{"name": k, "count": v} for k, v in sorted_tags]
|
| 261 |
|
| 262 |
@app.post("/extension/ingest")
|
| 263 |
async def extension_ingest_link(request: Request):
|
|
|
|
| 301 |
try:
|
| 302 |
data = await request.json()
|
| 303 |
target_ids = data.get("ids",[])
|
| 304 |
+
if not target_ids and data.get("id"): target_ids =[data.get("id")]
|
| 305 |
|
| 306 |
if not target_ids: return JSONResponse({"status": "error", "message": "No IDs provided"}, status_code=400)
|
| 307 |
|
|
|
|
| 359 |
try:
|
| 360 |
data = await request.json()
|
| 361 |
target_ids = data.get("ids",[])
|
| 362 |
+
if not target_ids and data.get("id"): target_ids =[data.get("id")]
|
| 363 |
if not target_ids: raise HTTPException(status_code=400)
|
| 364 |
|
| 365 |
target_ids =[str(t) for t in target_ids]
|
|
|
|
| 616 |
writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS, extrasaction='ignore')
|
| 617 |
writer.writeheader()
|
| 618 |
writer.writerows(rows)
|
| 619 |
+
|
| 620 |
+
# Add to User Profiles Catalog
|
| 621 |
+
author = common_utils.extract_twitter_username(link)
|
| 622 |
+
if author:
|
| 623 |
+
prof_dir = Path(f"data/profiles/{author}")
|
| 624 |
+
prof_dir.mkdir(parents=True, exist_ok=True)
|
| 625 |
+
hist_path = prof_dir / "history.csv"
|
| 626 |
+
hist_exists = hist_path.exists()
|
| 627 |
+
existing_links = set()
|
| 628 |
+
if hist_exists:
|
| 629 |
+
for r in common_utils.robust_read_csv(hist_path):
|
| 630 |
+
existing_links.add(r.get('link'))
|
| 631 |
+
if link not in existing_links:
|
| 632 |
+
with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
|
| 633 |
+
fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
|
| 634 |
+
hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
|
| 635 |
+
if not hist_exists: hwriter.writeheader()
|
| 636 |
+
hwriter.writerow({
|
| 637 |
+
"link": link,
|
| 638 |
+
"timestamp": row["timestamp"],
|
| 639 |
+
"text": row["caption"],
|
| 640 |
+
"ingested_at": row["timestamp"]
|
| 641 |
+
})
|
| 642 |
|
| 643 |
update_queue_status(link, "Processed")
|
| 644 |
return {"status": "success", "id": tweet_id}
|
|
|
|
| 646 |
logger.error(f"Save Manual Error: {e}")
|
| 647 |
return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 648 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 649 |
@app.get("/dataset/list")
|
| 650 |
async def get_dataset_list():
|
| 651 |
dataset =[]
|
|
|
|
| 670 |
async def get_account_integrity():
|
| 671 |
id_map = {}
|
| 672 |
prof_dir = Path("data/profiles")
|
| 673 |
+
prof_dir.mkdir(parents=True, exist_ok=True)
|
| 674 |
+
|
| 675 |
+
existing_links_per_user = {}
|
| 676 |
if prof_dir.exists():
|
| 677 |
for d in prof_dir.iterdir():
|
| 678 |
+
if d.is_dir():
|
| 679 |
+
hist_file = d / "history.csv"
|
| 680 |
+
existing_links_per_user[d.name] = set()
|
| 681 |
+
if hist_file.exists():
|
| 682 |
+
for row in common_utils.robust_read_csv(hist_file):
|
| 683 |
+
link = row.get('link', '')
|
| 684 |
+
tid = common_utils.extract_tweet_id(link)
|
| 685 |
+
if tid: id_map[tid] = d.name
|
| 686 |
+
existing_links_per_user[d.name].add(link)
|
| 687 |
|
| 688 |
scores_map = {}
|
| 689 |
for fname in ["data/dataset.csv", "data/manual_dataset.csv"]:
|
| 690 |
+
path = Path(fname)
|
| 691 |
+
if not path.exists(): continue
|
| 692 |
+
for row in common_utils.robust_read_csv(path):
|
| 693 |
tid = row.get('id')
|
| 694 |
+
link = row.get('link', '')
|
| 695 |
sc = row.get('final_veracity_score', '0')
|
| 696 |
+
ts = row.get('timestamp', '')
|
| 697 |
+
caption = row.get('caption', '')
|
| 698 |
try: val = float(re.sub(r'[^\d.]', '', str(sc)))
|
| 699 |
+
except: val = -1
|
| 700 |
|
| 701 |
+
# Require scores to be between 0 and 100
|
| 702 |
+
if 0 <= val <= 100:
|
| 703 |
+
auth = common_utils.extract_twitter_username(link) or id_map.get(tid, "Unknown")
|
| 704 |
+
if auth and auth != "Unknown":
|
| 705 |
+
if auth not in scores_map: scores_map[auth] = []
|
| 706 |
+
scores_map[auth].append({'val': val, 'ts': ts})
|
| 707 |
+
|
| 708 |
+
# Auto-add missing accounts/links to the Profile catalog
|
| 709 |
+
if auth not in existing_links_per_user:
|
| 710 |
+
existing_links_per_user[auth] = set()
|
| 711 |
+
Path(f"data/profiles/{auth}").mkdir(parents=True, exist_ok=True)
|
| 712 |
+
|
| 713 |
+
if link not in existing_links_per_user[auth]:
|
| 714 |
+
existing_links_per_user[auth].add(link)
|
| 715 |
+
hist_path = Path(f"data/profiles/{auth}/history.csv")
|
| 716 |
+
hist_exists = hist_path.exists()
|
| 717 |
+
with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
|
| 718 |
+
fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
|
| 719 |
+
hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
|
| 720 |
+
if not hist_exists: hwriter.writeheader()
|
| 721 |
+
hwriter.writerow({
|
| 722 |
+
"link": link,
|
| 723 |
+
"timestamp": ts,
|
| 724 |
+
"text": caption,
|
| 725 |
+
"ingested_at": ts
|
| 726 |
+
})
|
| 727 |
|
| 728 |
+
results =[]
|
| 729 |
+
for k, v in scores_map.items():
|
| 730 |
+
v_sorted = sorted(v, key=lambda x: x['ts'], reverse=True)
|
| 731 |
+
decay_factor = 0.9
|
| 732 |
+
total_weight = 0
|
| 733 |
+
weighted_sum = 0
|
| 734 |
+
|
| 735 |
+
for i, item in enumerate(v_sorted):
|
| 736 |
+
weight = decay_factor ** i
|
| 737 |
+
weighted_sum += item['val'] * weight
|
| 738 |
+
total_weight += weight
|
| 739 |
+
|
| 740 |
+
avg_veracity = round(weighted_sum / total_weight, 1) if total_weight > 0 else 0
|
| 741 |
+
results.append({"username": k, "avg_veracity": avg_veracity, "posts_labeled": len(v)})
|
| 742 |
+
|
| 743 |
+
return sorted(results, key=lambda x: x['avg_veracity'], reverse=True)
|
| 744 |
|
| 745 |
@app.post("/queue/add")
|
| 746 |
async def add_queue_item(link: str = Body(..., embed=True)):
|
|
|
|
| 886 |
return {"status": "success", "report": rep}
|
| 887 |
except Exception as e: return JSONResponse({"error": str(e)}, status_code=500)
|
| 888 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
@app.get("/", response_class=HTMLResponse)
|
| 890 |
async def read_root(request: Request):
|
| 891 |
return templates.TemplateResponse("index.html", {"request": request})
|
|
|
|
| 931 |
vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
|
| 932 |
nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
|
| 933 |
include_comments: bool = Form(False), reasoning_method: str = Form("cot"), prompt_template: str = Form("standard"),
|
| 934 |
+
custom_query: str = Form(""), max_reprompts: int = Form(1),
|
| 935 |
+
use_search: bool = Form(False), use_code: bool = Form(False)
|
| 936 |
):
|
| 937 |
global STOP_QUEUE_SIGNAL
|
| 938 |
STOP_QUEUE_SIGNAL = False
|
| 939 |
|
| 940 |
+
gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
|
| 941 |
+
vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
|
| 942 |
+
nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url, "max_retries": max_reprompts, "use_search": use_search, "use_code": use_code}
|
| 943 |
|
| 944 |
sel_p = PROMPT_VARIANTS.get(prompt_template, PROMPT_VARIANTS['standard'])
|
| 945 |
system_persona_txt = sel_p['instruction']
|
|
|
|
| 958 |
config_params_dict = {
|
| 959 |
"reprompts": max_reprompts,
|
| 960 |
"include_comments": include_comments,
|
| 961 |
+
"agent_active": False,
|
| 962 |
+
"use_search": use_search,
|
| 963 |
+
"use_code": use_code
|
| 964 |
}
|
| 965 |
config_params_str = json.dumps(config_params_dict)
|
| 966 |
|
|
|
|
| 991 |
gt_data = row
|
| 992 |
break
|
| 993 |
|
| 994 |
+
yield f"data:[START] {link} (Type: {task_type})\n\n"
|
| 995 |
tid = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
|
| 996 |
assets = await common_utils.prepare_video_assets(link, tid)
|
| 997 |
|
|
|
|
| 1067 |
yield f"data: Emotional Manipul. : AI {s_float(vec_ai.get('emotional_manipulation_score'))} | GT {s_float(gt_data.get('emotional_manipulation_score'))}\n"
|
| 1068 |
yield f"data: Video-Audio Align : AI {s_float(mod_ai.get('video_audio_score'))} | GT {s_float(gt_data.get('video_audio_score'))}\n"
|
| 1069 |
yield f"data: Video-Caption Align: AI {s_float(mod_ai.get('video_caption_score'))} | GT {s_float(gt_data.get('video_caption_score'))}\n"
|
| 1070 |
+
yield f"data: Audio-Caption Align: AI {s_float(mod_ai.get('audio_caption_score'))} | GT {s_float(gt_data.get('audio_caption_score'))}\n"
|
| 1071 |
+
yield f"data: FINAL VERACITY : AI {ai_score} | GT {gt_final} | Delta: {delta}\n\n"
|
| 1072 |
+
|
| 1073 |
+
comp_path = Path("data/comparison.csv")
|
| 1074 |
+
comp_exists = comp_path.exists()
|
| 1075 |
+
with open(comp_path, 'a', newline='', encoding='utf-8') as cf:
|
| 1076 |
+
cw = csv.DictWriter(cf, fieldnames=["id", "link", "timestamp", "gt_score", "ai_score", "delta", "model", "prompt", "reasoning_method"])
|
| 1077 |
+
if not comp_exists: cw.writeheader()
|
| 1078 |
+
cw.writerow({
|
| 1079 |
+
"id": tid, "link": link, "timestamp": datetime.datetime.now().isoformat(),
|
| 1080 |
+
"gt_score": gt_final, "ai_score": ai_score, "delta": delta,
|
| 1081 |
+
"model": active_model_name, "prompt": prompt_template, "reasoning_method": reasoning_method
|
| 1082 |
+
})
|
| 1083 |
+
|
| 1084 |
+
try:
|
| 1085 |
+
with open(d_path, 'a', newline='', encoding='utf-8') as f:
|
| 1086 |
+
row = {
|
| 1087 |
+
"id": tid, "link": link, "timestamp": datetime.datetime.now().isoformat(),
|
| 1088 |
+
"caption": assets['caption'],
|
| 1089 |
+
"final_veracity_score": ai_score,
|
| 1090 |
+
"visual_score": parsed['veracity_vectors'].get('visual_integrity_score', 0),
|
| 1091 |
+
"audio_score": parsed['veracity_vectors'].get('audio_integrity_score', 0),
|
| 1092 |
+
"source_score": parsed['veracity_vectors'].get('source_credibility_score', 0),
|
| 1093 |
+
"logic_score": parsed['veracity_vectors'].get('logical_consistency_score', 0),
|
| 1094 |
+
"emotion_score": parsed['veracity_vectors'].get('emotional_manipulation_score', 0),
|
| 1095 |
+
"align_video_audio": parsed['modalities'].get('video_audio_score', 0),
|
| 1096 |
+
"align_video_caption": parsed['modalities'].get('video_caption_score', 0),
|
| 1097 |
+
"align_audio_caption": parsed['modalities'].get('audio_caption_score', 0),
|
| 1098 |
+
"classification": parsed['disinformation_analysis'].get('classification', 'None'),
|
| 1099 |
+
"reasoning": parsed['final_assessment'].get('reasoning', ''),
|
| 1100 |
+
"tags": ",".join(parsed.get('tags',[])),
|
| 1101 |
+
"raw_toon": res_data.get("raw_toon", ""),
|
| 1102 |
+
"config_type": "GenAI",
|
| 1103 |
+
"config_model": active_model_name,
|
| 1104 |
+
"config_prompt": prompt_template,
|
| 1105 |
+
"config_reasoning": reasoning_method,
|
| 1106 |
+
"config_params": config_params_str
|
| 1107 |
+
}
|
| 1108 |
+
writer = csv.DictWriter(f, fieldnames=DATASET_COLUMNS, extrasaction='ignore')
|
| 1109 |
+
if not exists: writer.writeheader()
|
| 1110 |
+
writer.writerow(row)
|
| 1111 |
+
except Exception as csv_err: logger.error(f"CSV Write Failed: {csv_err}")
|
| 1112 |
+
|
| 1113 |
+
try:
|
| 1114 |
+
ts = datetime.datetime.now().isoformat()
|
| 1115 |
+
ts_clean = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1116 |
+
flat_parsed = parsed.copy()
|
| 1117 |
+
flat_parsed["raw_toon"] = res_data.get("raw_toon", "")
|
| 1118 |
+
flat_parsed["meta_info"] = {
|
| 1119 |
+
"id": tid, "timestamp": ts, "link": link,
|
| 1120 |
+
"prompt_used": res_data.get("prompt_used", ""),
|
| 1121 |
+
"model_selection": model_selection,
|
| 1122 |
+
"config_type": "GenAI",
|
| 1123 |
+
"config_model": active_model_name,
|
| 1124 |
+
"config_prompt": prompt_template,
|
| 1125 |
+
"config_reasoning": reasoning_method,
|
| 1126 |
+
"config_params": config_params_dict
|
| 1127 |
+
}
|
| 1128 |
+
with open(Path(f"data/labels/{tid}_{ts_clean}.json"), 'w', encoding='utf-8') as f: json.dump(flat_parsed, f, indent=2, ensure_ascii=False)
|
| 1129 |
+
except Exception as e: logger.error(f"Sidecar Error: {e}")
|
| 1130 |
+
|
| 1131 |
+
# Add to User Profiles Catalog
|
| 1132 |
+
author = common_utils.extract_twitter_username(link)
|
| 1133 |
+
if author:
|
| 1134 |
+
prof_dir = Path(f"data/profiles/{author}")
|
| 1135 |
+
prof_dir.mkdir(parents=True, exist_ok=True)
|
| 1136 |
+
hist_path = prof_dir / "history.csv"
|
| 1137 |
+
hist_exists = hist_path.exists()
|
| 1138 |
+
existing_links = set()
|
| 1139 |
+
if hist_exists:
|
| 1140 |
+
for r in common_utils.robust_read_csv(hist_path):
|
| 1141 |
+
existing_links.add(r.get('link'))
|
| 1142 |
+
if link not in existing_links:
|
| 1143 |
+
with open(hist_path, 'a', newline='', encoding='utf-8') as hf:
|
| 1144 |
+
fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
|
| 1145 |
+
hwriter = csv.DictWriter(hf, fieldnames=fieldnames, extrasaction='ignore')
|
| 1146 |
+
if not hist_exists: hwriter.writeheader()
|
| 1147 |
+
hwriter.writerow({
|
| 1148 |
+
"link": link,
|
| 1149 |
+
"timestamp": datetime.datetime.now().isoformat(),
|
| 1150 |
+
"text": assets['caption'],
|
| 1151 |
+
"ingested_at": datetime.datetime.now().isoformat()
|
| 1152 |
+
})
|
| 1153 |
+
|
| 1154 |
+
p_ids.add(tid)
|
| 1155 |
+
p_links.add(common_utils.normalize_link(link))
|
| 1156 |
+
update_queue_status(link, "Processed", task_type)
|
| 1157 |
+
yield f"data:[SUCCESS] Saved.\n\n"
|
| 1158 |
+
else:
|
| 1159 |
+
err_msg = res_data.get('error') if isinstance(res_data, dict) else "Inference failed"
|
| 1160 |
+
log_queue_error(link, err_msg, task_type)
|
| 1161 |
+
yield f"data: [FAIL] {err_msg}.\n\n"
|
| 1162 |
+
await asyncio.sleep(0.5)
|
| 1163 |
+
yield "event: close\ndata: Done\n\n"
|
| 1164 |
+
|
| 1165 |
+
return StreamingResponse(queue_stream(), media_type="text/event-stream")
|
src/benchmarking.py
CHANGED
|
@@ -1,224 +1,229 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
import shutil
|
| 4 |
-
import json
|
| 5 |
-
import math
|
| 6 |
-
from pathlib import Path
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
AUTOGLUON_AVAILABLE =
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
return obj
|
| 25 |
-
elif isinstance(obj,
|
| 26 |
-
return
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
"""
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
merged['
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
('
|
| 76 |
-
('
|
| 77 |
-
('
|
| 78 |
-
('
|
| 79 |
-
('
|
| 80 |
-
('
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
]
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
merged[
|
| 91 |
-
err_c
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
p =
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
return
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
merged['bin_ai'] = merged['
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
"
|
| 140 |
-
"
|
| 141 |
-
"
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
"""
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
merged = merged
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
"
|
| 219 |
-
"
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
import shutil
|
| 4 |
+
import json
|
| 5 |
+
import math
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
# Lazy import to avoid startup overhead
|
| 9 |
+
try:
|
| 10 |
+
from autogluon.tabular import TabularPredictor
|
| 11 |
+
AUTOGLUON_AVAILABLE = True
|
| 12 |
+
except ImportError:
|
| 13 |
+
AUTOGLUON_AVAILABLE = False
|
| 14 |
+
|
| 15 |
+
DATA_AI = Path("data/dataset.csv")
|
| 16 |
+
DATA_MANUAL = Path("data/manual_dataset.csv")
|
| 17 |
+
|
| 18 |
+
def sanitize_for_json(obj):
|
| 19 |
+
"""Recursively clean floats for JSON output."""
|
| 20 |
+
if isinstance(obj, float):
|
| 21 |
+
if math.isnan(obj) or math.isinf(obj): return None
|
| 22 |
+
return obj
|
| 23 |
+
elif isinstance(obj, dict):
|
| 24 |
+
return {k: sanitize_for_json(v) for k, v in obj.items()}
|
| 25 |
+
elif isinstance(obj, list):
|
| 26 |
+
return[sanitize_for_json(v) for v in obj]
|
| 27 |
+
return obj
|
| 28 |
+
|
| 29 |
+
def calculate_tag_accuracy(tags_ai, tags_man):
|
| 30 |
+
if pd.isna(tags_ai): tags_ai = ""
|
| 31 |
+
if pd.isna(tags_man): tags_man = ""
|
| 32 |
+
set_ai = set([t.strip().lower() for t in str(tags_ai).split(',') if t.strip()])
|
| 33 |
+
set_man = set([t.strip().lower() for t in str(tags_man).split(',') if t.strip()])
|
| 34 |
+
if not set_man and not set_ai: return 1.0
|
| 35 |
+
if not set_man or not set_ai: return 0.0
|
| 36 |
+
# Jaccard Similarity
|
| 37 |
+
return len(set_ai.intersection(set_man)) / len(set_ai.union(set_man))
|
| 38 |
+
|
| 39 |
+
def get_combined_dataset():
|
| 40 |
+
"""
|
| 41 |
+
Joins AI predictions with Manual Ground Truth on ID and calculates comprehensive vector differences.
|
| 42 |
+
"""
|
| 43 |
+
if not DATA_AI.exists() or not DATA_MANUAL.exists():
|
| 44 |
+
return None
|
| 45 |
+
|
| 46 |
+
try:
|
| 47 |
+
# Load datasets
|
| 48 |
+
df_ai = pd.read_csv(DATA_AI)
|
| 49 |
+
df_manual = pd.read_csv(DATA_MANUAL)
|
| 50 |
+
|
| 51 |
+
# Normalize IDs (Trim spaces, ensure string)
|
| 52 |
+
df_ai['id'] = df_ai['id'].astype(str).str.strip()
|
| 53 |
+
df_manual['id'] = df_manual['id'].astype(str).str.strip()
|
| 54 |
+
|
| 55 |
+
df_manual_cols =['id', 'final_veracity_score', 'visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score', 'video_audio_score', 'video_caption_score', 'audio_caption_score', 'tags', 'classification']
|
| 56 |
+
|
| 57 |
+
# Merge on ID
|
| 58 |
+
merged = pd.merge(
|
| 59 |
+
df_ai,
|
| 60 |
+
df_manual[[c for c in df_manual_cols if c in df_manual.columns]],
|
| 61 |
+
on='id',
|
| 62 |
+
suffixes=('_ai', '_manual'),
|
| 63 |
+
how='inner'
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
# 1. Final Score Error
|
| 67 |
+
merged['final_veracity_score_ai'] = pd.to_numeric(merged['final_veracity_score_ai'], errors='coerce').fillna(0)
|
| 68 |
+
merged['final_veracity_score_manual'] = pd.to_numeric(merged['final_veracity_score_manual'], errors='coerce').fillna(0)
|
| 69 |
+
merged['abs_error'] = (merged['final_veracity_score_ai'] - merged['final_veracity_score_manual']).abs()
|
| 70 |
+
|
| 71 |
+
# 2. Sophisticated Vector Calculations
|
| 72 |
+
vector_pairs =[
|
| 73 |
+
('visual_score', 'visual_integrity_score'),
|
| 74 |
+
('audio_score', 'audio_integrity_score'),
|
| 75 |
+
('source_score', 'source_credibility_score'),
|
| 76 |
+
('logic_score', 'logical_consistency_score'),
|
| 77 |
+
('emotion_score', 'emotional_manipulation_score'),
|
| 78 |
+
('align_video_audio', 'video_audio_score'),
|
| 79 |
+
('align_video_caption', 'video_caption_score'),
|
| 80 |
+
('align_audio_caption', 'audio_caption_score'),
|
| 81 |
+
]
|
| 82 |
+
|
| 83 |
+
error_cols = ['abs_error']
|
| 84 |
+
for ai_c, man_c in vector_pairs:
|
| 85 |
+
if ai_c in merged.columns and man_c in merged.columns:
|
| 86 |
+
# Multiply 1-10 scores by 10 to put them on the same 0-100 scale as final score
|
| 87 |
+
merged[ai_c] = pd.to_numeric(merged[ai_c], errors='coerce').fillna(5) * 10
|
| 88 |
+
merged[man_c] = pd.to_numeric(merged[man_c], errors='coerce').fillna(5) * 10
|
| 89 |
+
err_c = f"err_{ai_c}"
|
| 90 |
+
merged[err_c] = (merged[ai_c] - merged[man_c]).abs()
|
| 91 |
+
error_cols.append(err_c)
|
| 92 |
+
|
| 93 |
+
# Composite MAE represents the mean absolute error across the final score AND all 8 sub-vectors
|
| 94 |
+
merged['composite_mae'] = merged[error_cols].mean(axis=1)
|
| 95 |
+
|
| 96 |
+
# 3. Tag Accuracy Calculation
|
| 97 |
+
merged['tag_accuracy'] = merged.apply(lambda row: calculate_tag_accuracy(row.get('tags_ai', ''), row.get('tags_manual', '')), axis=1)
|
| 98 |
+
|
| 99 |
+
return merged
|
| 100 |
+
except Exception as e:
|
| 101 |
+
print(f"Error merging datasets: {e}")
|
| 102 |
+
return None
|
| 103 |
+
|
| 104 |
+
def format_config_params(params_raw):
|
| 105 |
+
"""Parses the config_params JSON string into a readable format for the leaderboard."""
|
| 106 |
+
if pd.isna(params_raw) or not params_raw:
|
| 107 |
+
return "Defaults"
|
| 108 |
+
try:
|
| 109 |
+
if isinstance(params_raw, str):
|
| 110 |
+
p = json.loads(params_raw)
|
| 111 |
+
else:
|
| 112 |
+
p = params_raw
|
| 113 |
+
|
| 114 |
+
reprompts = p.get('reprompts', 0)
|
| 115 |
+
comments = "Yes" if p.get('include_comments') == 'true' or p.get('include_comments') is True else "No"
|
| 116 |
+
return f"Retries:{reprompts} | Context:{comments}"
|
| 117 |
+
except:
|
| 118 |
+
return "Legacy/Unknown"
|
| 119 |
+
|
| 120 |
+
def calculate_benchmarks():
|
| 121 |
+
"""Global stats (All AI models vs Ground Truth)."""
|
| 122 |
+
merged = get_combined_dataset()
|
| 123 |
+
if merged is None or len(merged) == 0:
|
| 124 |
+
return {"status": "no_data"}
|
| 125 |
+
|
| 126 |
+
mae = merged['composite_mae'].mean()
|
| 127 |
+
tag_acc = merged['tag_accuracy'].mean()
|
| 128 |
+
|
| 129 |
+
# Binary Accuracy (Threshold 50)
|
| 130 |
+
merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
|
| 131 |
+
merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
|
| 132 |
+
accuracy = (merged['bin_ai'] == merged['bin_manual']).mean()
|
| 133 |
+
|
| 134 |
+
recent_samples = merged.tail(5)[['id', 'composite_mae', 'final_veracity_score_ai', 'final_veracity_score_manual']].to_dict(orient='records')
|
| 135 |
+
|
| 136 |
+
result = {
|
| 137 |
+
"count": int(len(merged)),
|
| 138 |
+
"mae": round(mae, 2), # Exposing composite MAE as main MAE metric
|
| 139 |
+
"accuracy_percent": round(accuracy * 100, 1),
|
| 140 |
+
"tag_accuracy_percent": round(tag_acc * 100, 1),
|
| 141 |
+
"recent_samples": recent_samples
|
| 142 |
+
}
|
| 143 |
+
return sanitize_for_json(result)
|
| 144 |
+
|
| 145 |
+
def generate_leaderboard():
|
| 146 |
+
"""
|
| 147 |
+
Groups results by Configuration to rank models/prompts using sophisticated distance measurements.
|
| 148 |
+
"""
|
| 149 |
+
merged = get_combined_dataset()
|
| 150 |
+
if merged is None or len(merged) == 0:
|
| 151 |
+
return []
|
| 152 |
+
|
| 153 |
+
for col in['config_model', 'config_prompt', 'config_reasoning', 'config_params']:
|
| 154 |
+
if col not in merged.columns: merged[col] = "Unknown"
|
| 155 |
+
|
| 156 |
+
merged = merged.fillna({'config_model': 'Unknown', 'config_prompt': 'Standard', 'config_reasoning': 'None'})
|
| 157 |
+
|
| 158 |
+
merged['params_readable'] = merged['config_params'].apply(format_config_params)
|
| 159 |
+
|
| 160 |
+
def extract_tools(p_raw):
|
| 161 |
+
try:
|
| 162 |
+
if isinstance(p_raw, str): p = json.loads(p_raw)
|
| 163 |
+
else: p = p_raw
|
| 164 |
+
if not isinstance(p, dict): return "None"
|
| 165 |
+
tools =[]
|
| 166 |
+
if p.get('agent_active'): tools.append("Agent")
|
| 167 |
+
if p.get('use_search'): tools.append("Search")
|
| 168 |
+
if p.get('use_code'): tools.append("Code")
|
| 169 |
+
if p.get('few_shot') or p.get('multi_shot'): tools.append("Few-Shot")
|
| 170 |
+
return ", ".join(tools) if tools else "None"
|
| 171 |
+
except:
|
| 172 |
+
return "None"
|
| 173 |
+
|
| 174 |
+
merged['tools'] = merged['config_params'].apply(extract_tools)
|
| 175 |
+
|
| 176 |
+
merged['bin_ai'] = merged['final_veracity_score_ai'] >= 50
|
| 177 |
+
merged['bin_manual'] = merged['final_veracity_score_manual'] >= 50
|
| 178 |
+
merged['is_correct'] = (merged['bin_ai'] == merged['bin_manual']).astype(int)
|
| 179 |
+
|
| 180 |
+
def get_fcot_depth(row):
|
| 181 |
+
r = str(row['config_reasoning']).lower()
|
| 182 |
+
if 'fcot' in r: return 2
|
| 183 |
+
elif 'cot' in r: return 1
|
| 184 |
+
return 0
|
| 185 |
+
merged['fcot_depth'] = merged.apply(get_fcot_depth, axis=1)
|
| 186 |
+
|
| 187 |
+
agg_dict = {
|
| 188 |
+
'comp_mae': ('composite_mae', 'mean'),
|
| 189 |
+
'tag_accuracy': ('tag_accuracy', 'mean'),
|
| 190 |
+
'accuracy': ('is_correct', 'mean'),
|
| 191 |
+
'count': ('id', 'count')
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
err_cols =[
|
| 195 |
+
'err_visual_score', 'err_audio_score', 'err_source_score',
|
| 196 |
+
'err_logic_score', 'err_emotion_score', 'err_align_video_audio',
|
| 197 |
+
'err_align_video_caption', 'err_align_audio_caption'
|
| 198 |
+
]
|
| 199 |
+
for col in err_cols:
|
| 200 |
+
if col in merged.columns:
|
| 201 |
+
agg_dict[col] = (col, 'mean')
|
| 202 |
+
|
| 203 |
+
# Group By Configuration using Composite MAE and Tag Accuracy
|
| 204 |
+
grouped = merged.groupby(['config_model', 'config_prompt', 'config_reasoning', 'params_readable', 'tools', 'fcot_depth']).agg(**agg_dict).reset_index()
|
| 205 |
+
|
| 206 |
+
leaderboard =[]
|
| 207 |
+
for _, row in grouped.iterrows():
|
| 208 |
+
entry = {
|
| 209 |
+
"type": "GenAI",
|
| 210 |
+
"model": row['config_model'],
|
| 211 |
+
"prompt": row['config_prompt'],
|
| 212 |
+
"reasoning": row['config_reasoning'],
|
| 213 |
+
"params": row['params_readable'],
|
| 214 |
+
"tools": row['tools'],
|
| 215 |
+
"fcot_depth": int(row['fcot_depth']),
|
| 216 |
+
"comp_mae": round(row['comp_mae'], 2),
|
| 217 |
+
"tag_acc": round(row['tag_accuracy'] * 100, 1),
|
| 218 |
+
"accuracy": round(row['accuracy'] * 100, 1),
|
| 219 |
+
"samples": int(row['count'])
|
| 220 |
+
}
|
| 221 |
+
for col in err_cols:
|
| 222 |
+
if col in row:
|
| 223 |
+
entry[col] = round(row[col], 2)
|
| 224 |
+
leaderboard.append(entry)
|
| 225 |
+
|
| 226 |
+
# Sort: Highest Accuracy, Highest Tag Accuracy, then Lowest Composite MAE
|
| 227 |
+
leaderboard.sort(key=lambda x: (-x['accuracy'], -x['tag_acc'], x['comp_mae']))
|
| 228 |
+
|
| 229 |
+
return sanitize_for_json(leaderboard)
|
src/common_utils.py
CHANGED
|
@@ -1,104 +1,110 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import re
|
| 3 |
-
import csv
|
| 4 |
-
import logging
|
| 5 |
-
import datetime
|
| 6 |
-
import subprocess
|
| 7 |
-
import hashlib
|
| 8 |
-
from pathlib import Path
|
| 9 |
-
import yt_dlp
|
| 10 |
-
import transcription
|
| 11 |
-
|
| 12 |
-
logger = logging.getLogger(__name__)
|
| 13 |
-
|
| 14 |
-
def robust_read_csv(file_path: Path):
|
| 15 |
-
if not file_path.exists():
|
| 16 |
-
return
|
| 17 |
-
|
| 18 |
-
try:
|
| 19 |
-
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
| 20 |
-
clean_lines = (line.replace('\0', '') for line in f)
|
| 21 |
-
reader = csv.DictReader(clean_lines)
|
| 22 |
-
for row in reader:
|
| 23 |
-
if row:
|
| 24 |
-
yield row
|
| 25 |
-
except Exception as e:
|
| 26 |
-
logger.error(f"Error reading CSV {file_path}: {e}")
|
| 27 |
-
return
|
| 28 |
-
|
| 29 |
-
def extract_tweet_id(url: str) -> str | None:
|
| 30 |
-
if not url: return None
|
| 31 |
-
match = re.search(r"(?:twitter|x)\.com/[^/]+/status/(\d+)", url)
|
| 32 |
-
if match: return match.group(1)
|
| 33 |
-
return None
|
| 34 |
-
|
| 35 |
-
def
|
| 36 |
-
if not
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import re
|
| 3 |
+
import csv
|
| 4 |
+
import logging
|
| 5 |
+
import datetime
|
| 6 |
+
import subprocess
|
| 7 |
+
import hashlib
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
import yt_dlp
|
| 10 |
+
import transcription
|
| 11 |
+
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
def robust_read_csv(file_path: Path):
|
| 15 |
+
if not file_path.exists():
|
| 16 |
+
return
|
| 17 |
+
|
| 18 |
+
try:
|
| 19 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
| 20 |
+
clean_lines = (line.replace('\0', '') for line in f)
|
| 21 |
+
reader = csv.DictReader(clean_lines)
|
| 22 |
+
for row in reader:
|
| 23 |
+
if row:
|
| 24 |
+
yield row
|
| 25 |
+
except Exception as e:
|
| 26 |
+
logger.error(f"Error reading CSV {file_path}: {e}")
|
| 27 |
+
return
|
| 28 |
+
|
| 29 |
+
def extract_tweet_id(url: str) -> str | None:
|
| 30 |
+
if not url: return None
|
| 31 |
+
match = re.search(r"(?:twitter|x)\.com/[^/]+/status/(\d+)", url)
|
| 32 |
+
if match: return match.group(1)
|
| 33 |
+
return None
|
| 34 |
+
|
| 35 |
+
def extract_twitter_username(url: str) -> str | None:
|
| 36 |
+
if not url: return None
|
| 37 |
+
match = re.search(r"(?:twitter|x)\.com/([^/]+)/status/\d+", url)
|
| 38 |
+
if match: return match.group(1).lower()
|
| 39 |
+
return None
|
| 40 |
+
|
| 41 |
+
def normalize_link(link: str) -> str:
|
| 42 |
+
if not link: return ""
|
| 43 |
+
return link.split('?')[0].strip().rstrip('/').replace('http://', '').replace('https://', '').replace('www.', '')
|
| 44 |
+
|
| 45 |
+
def parse_vtt(file_path: str) -> str:
|
| 46 |
+
"""Parses a .vtt subtitle file and returns the clean text content."""
|
| 47 |
+
try:
|
| 48 |
+
if not os.path.exists(file_path):
|
| 49 |
+
return "Transcript file not found."
|
| 50 |
+
|
| 51 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 52 |
+
lines = f.readlines()
|
| 53 |
+
|
| 54 |
+
text_lines =[]
|
| 55 |
+
for line in lines:
|
| 56 |
+
line = line.strip()
|
| 57 |
+
if line and not line.startswith('WEBVTT') and not '-->' in line and not line.isdigit():
|
| 58 |
+
clean_line = re.sub(r'<[^>]+>', '', line)
|
| 59 |
+
if clean_line and (not text_lines or clean_line != text_lines[-1]):
|
| 60 |
+
text_lines.append(clean_line)
|
| 61 |
+
|
| 62 |
+
return "\n".join(text_lines) if text_lines else "No speech found in transcript."
|
| 63 |
+
except Exception as e:
|
| 64 |
+
logger.error(f"Error parsing VTT file {file_path}: {e}")
|
| 65 |
+
return f"Error reading transcript: {e}"
|
| 66 |
+
|
| 67 |
+
async def prepare_video_assets(link: str, output_id: str) -> dict:
|
| 68 |
+
video_dir = Path("data/videos")
|
| 69 |
+
if not video_dir.exists():
|
| 70 |
+
video_dir.mkdir(parents=True, exist_ok=True)
|
| 71 |
+
|
| 72 |
+
video_path = video_dir / f"{output_id}.mp4"
|
| 73 |
+
audio_path = video_dir / f"{output_id}.wav"
|
| 74 |
+
transcript_path = video_dir / f"{output_id}.vtt"
|
| 75 |
+
|
| 76 |
+
caption = ""
|
| 77 |
+
video_downloaded = False
|
| 78 |
+
|
| 79 |
+
ydl_opts = {
|
| 80 |
+
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/mp4',
|
| 81 |
+
'outtmpl': str(video_path),
|
| 82 |
+
'quiet': True, 'ignoreerrors': True, 'no_warnings': True, 'skip_download': False
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
try:
|
| 86 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 87 |
+
info = ydl.extract_info(link, download=False)
|
| 88 |
+
if info:
|
| 89 |
+
caption = info.get('description', '') or info.get('title', '')
|
| 90 |
+
formats = info.get('formats',[])
|
| 91 |
+
if not formats and not info.get('url'):
|
| 92 |
+
logger.info(f"No video formats found for {link}. Treating as text-only.")
|
| 93 |
+
else:
|
| 94 |
+
if not video_path.exists(): ydl.download([link])
|
| 95 |
+
except Exception as e:
|
| 96 |
+
logger.error(f"Download error for {link}: {e}")
|
| 97 |
+
|
| 98 |
+
if video_path.exists() and video_path.stat().st_size > 0:
|
| 99 |
+
video_downloaded = True
|
| 100 |
+
if not audio_path.exists():
|
| 101 |
+
subprocess.run(["ffmpeg", "-y", "-i", str(video_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", str(audio_path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
| 102 |
+
if audio_path.exists() and not transcript_path.exists():
|
| 103 |
+
transcription.load_model()
|
| 104 |
+
transcription.generate_transcript(str(audio_path))
|
| 105 |
+
|
| 106 |
+
return {
|
| 107 |
+
"video": str(video_path) if video_downloaded else None,
|
| 108 |
+
"transcript": str(transcript_path) if video_downloaded and transcript_path.exists() else None,
|
| 109 |
+
"caption": caption
|
| 110 |
+
}
|
src/factuality_logic.py
CHANGED
|
@@ -7,9 +7,13 @@ import asyncio
|
|
| 7 |
from pathlib import Path
|
| 8 |
import inference_logic
|
| 9 |
from toon_parser import parse_toon_line
|
|
|
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
|
|
|
|
|
|
|
|
|
| 13 |
PROMPT_VISUAL_ARTIFACTS = (
|
| 14 |
"Analyze the video for visual manipulation (Deepfakes, editing anomalies).\n"
|
| 15 |
"Steps inside <thinking>: 1. Scan for artifacts. 2. Check cuts.\n"
|
|
@@ -36,29 +40,10 @@ PROMPT_AUDIO_ANALYSIS = (
|
|
| 36 |
"Score(1-10),\"Justification text\""
|
| 37 |
)
|
| 38 |
|
| 39 |
-
|
| 40 |
-
def parse_vtt(file_path: str) -> str:
|
| 41 |
-
try:
|
| 42 |
-
if not os.path.exists(file_path):
|
| 43 |
-
return "Transcript file not found."
|
| 44 |
-
|
| 45 |
-
with open(file_path, 'r', encoding='utf-8') as f:
|
| 46 |
-
lines = f.readlines()
|
| 47 |
-
|
| 48 |
-
text_lines = []
|
| 49 |
-
for line in lines:
|
| 50 |
-
line = line.strip()
|
| 51 |
-
if line and not line.startswith('WEBVTT') and not '-->' in line and not line.isdigit():
|
| 52 |
-
clean_line = re.sub(r'<[^>]+>', '', line)
|
| 53 |
-
if clean_line and (not text_lines or clean_line != text_lines[-1]):
|
| 54 |
-
text_lines.append(clean_line)
|
| 55 |
-
|
| 56 |
-
return "\n".join(text_lines) if text_lines else "No speech found in transcript."
|
| 57 |
-
except Exception as e:
|
| 58 |
-
logger.error(f"Error parsing VTT file {file_path}: {e}")
|
| 59 |
-
return f"Error reading transcript: {e}"
|
| 60 |
-
|
| 61 |
async def run_factuality_pipeline(paths: dict, checks: dict, generation_config: dict):
|
|
|
|
|
|
|
|
|
|
| 62 |
video_path = paths.get("video")
|
| 63 |
transcript_path = paths.get("transcript")
|
| 64 |
|
|
@@ -78,7 +63,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
|
|
| 78 |
yield f"\n--- Extracted Transcript ---\n{transcript}\n--------------------------\n\n"
|
| 79 |
await asyncio.sleep(0.1)
|
| 80 |
|
| 81 |
-
analysis_steps =
|
| 82 |
if checks.get("visuals"):
|
| 83 |
analysis_steps.append(("Visual Integrity", PROMPT_VISUAL_ARTIFACTS))
|
| 84 |
if checks.get("content"):
|
|
@@ -96,6 +81,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
|
|
| 96 |
sampling_fps = current_gen_config.pop("sampling_fps", 2.0)
|
| 97 |
current_gen_config.pop("num_perceptions", None)
|
| 98 |
|
|
|
|
| 99 |
current_gen_config["temperature"] = 0.1
|
| 100 |
current_gen_config["do_sample"] = True
|
| 101 |
|
|
@@ -109,7 +95,9 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
|
|
| 109 |
|
| 110 |
yield f" - Analysis Complete for '{title}'. Parsing TOON...\n\n"
|
| 111 |
|
|
|
|
| 112 |
parsed_result = {}
|
|
|
|
| 113 |
match = re.search(r'(\w+_analysis): result\[2\]\{score,justification\}:\s*\n(.+)', ans, re.MULTILINE)
|
| 114 |
|
| 115 |
thinking = "No thinking block found."
|
|
@@ -125,6 +113,7 @@ async def run_factuality_pipeline(paths: dict, checks: dict, generation_config:
|
|
| 125 |
yield f"Warning: Model did not return valid TOON. Raw output:\n{ans}\n"
|
| 126 |
continue
|
| 127 |
|
|
|
|
| 128 |
score = parsed_result.get('score', 'N/A')
|
| 129 |
justification = parsed_result.get('justification', 'No justification provided.')
|
| 130 |
|
|
|
|
| 7 |
from pathlib import Path
|
| 8 |
import inference_logic
|
| 9 |
from toon_parser import parse_toon_line
|
| 10 |
+
from common_utils import parse_vtt
|
| 11 |
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
+
# --- Enhanced TOON Prompts for Individual Checks ---
|
| 15 |
+
# Using TOON reduces output tokens significantly compared to JSON.
|
| 16 |
+
|
| 17 |
PROMPT_VISUAL_ARTIFACTS = (
|
| 18 |
"Analyze the video for visual manipulation (Deepfakes, editing anomalies).\n"
|
| 19 |
"Steps inside <thinking>: 1. Scan for artifacts. 2. Check cuts.\n"
|
|
|
|
| 40 |
"Score(1-10),\"Justification text\""
|
| 41 |
)
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
async def run_factuality_pipeline(paths: dict, checks: dict, generation_config: dict):
|
| 44 |
+
"""
|
| 45 |
+
Asynchronously runs a pipeline of factuality checks, parses TOON scores, and yields results.
|
| 46 |
+
"""
|
| 47 |
video_path = paths.get("video")
|
| 48 |
transcript_path = paths.get("transcript")
|
| 49 |
|
|
|
|
| 63 |
yield f"\n--- Extracted Transcript ---\n{transcript}\n--------------------------\n\n"
|
| 64 |
await asyncio.sleep(0.1)
|
| 65 |
|
| 66 |
+
analysis_steps =[]
|
| 67 |
if checks.get("visuals"):
|
| 68 |
analysis_steps.append(("Visual Integrity", PROMPT_VISUAL_ARTIFACTS))
|
| 69 |
if checks.get("content"):
|
|
|
|
| 81 |
sampling_fps = current_gen_config.pop("sampling_fps", 2.0)
|
| 82 |
current_gen_config.pop("num_perceptions", None)
|
| 83 |
|
| 84 |
+
# FORCE LOW TEMP for structured TOON analysis
|
| 85 |
current_gen_config["temperature"] = 0.1
|
| 86 |
current_gen_config["do_sample"] = True
|
| 87 |
|
|
|
|
| 95 |
|
| 96 |
yield f" - Analysis Complete for '{title}'. Parsing TOON...\n\n"
|
| 97 |
|
| 98 |
+
# --- Attempt to parse TOON from the model's response ---
|
| 99 |
parsed_result = {}
|
| 100 |
+
# Regex to find the TOON data line: key: type[count]{headers}:\nVALUE
|
| 101 |
match = re.search(r'(\w+_analysis): result\[2\]\{score,justification\}:\s*\n(.+)', ans, re.MULTILINE)
|
| 102 |
|
| 103 |
thinking = "No thinking block found."
|
|
|
|
| 113 |
yield f"Warning: Model did not return valid TOON. Raw output:\n{ans}\n"
|
| 114 |
continue
|
| 115 |
|
| 116 |
+
# --- Display the parsed, structured result ---
|
| 117 |
score = parsed_result.get('score', 'N/A')
|
| 118 |
justification = parsed_result.get('justification', 'No justification provided.')
|
| 119 |
|
src/inference_logic.py
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
|
|
| 1 |
import re
|
|
|
|
| 2 |
import sys
|
| 3 |
import os
|
| 4 |
-
import time
|
| 5 |
import logging
|
| 6 |
import asyncio
|
| 7 |
import json
|
| 8 |
-
import requests
|
| 9 |
import datetime
|
|
|
|
| 10 |
|
| 11 |
-
# Safe imports for Lite Mode (API only)
|
| 12 |
try:
|
| 13 |
from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
|
| 14 |
from peft import PeftModel
|
|
@@ -17,28 +17,30 @@ except ImportError:
|
|
| 17 |
AutoProcessor = None
|
| 18 |
PeftModel = None
|
| 19 |
|
| 20 |
-
from labeling_logic import (
|
| 21 |
-
LABELING_PROMPT_TEMPLATE, SCORE_INSTRUCTIONS_SIMPLE, SCORE_INSTRUCTIONS_REASONING,
|
| 22 |
-
SCHEMA_SIMPLE, SCHEMA_REASONING,
|
| 23 |
-
FCOT_MACRO_PROMPT, FCOT_MESO_PROMPT, FCOT_SYNTHESIS_PROMPT
|
| 24 |
-
)
|
| 25 |
-
from toon_parser import parse_veracity_toon
|
| 26 |
-
|
| 27 |
-
# Optional local imports
|
| 28 |
try:
|
| 29 |
from my_vision_process import process_vision_info, client
|
| 30 |
except ImportError:
|
| 31 |
process_vision_info = None
|
| 32 |
client = None
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
# Google GenAI Imports
|
| 35 |
try:
|
| 36 |
import google.generativeai as genai_legacy
|
| 37 |
-
from google.generativeai.types import generation_types
|
| 38 |
except ImportError:
|
| 39 |
genai_legacy = None
|
| 40 |
|
| 41 |
try:
|
|
|
|
| 42 |
from google import genai
|
| 43 |
from google.genai.types import (
|
| 44 |
GenerateContentConfig,
|
|
@@ -47,8 +49,7 @@ try:
|
|
| 47 |
Tool,
|
| 48 |
VertexAISearch,
|
| 49 |
GoogleSearch,
|
| 50 |
-
Part
|
| 51 |
-
SafetySetting
|
| 52 |
)
|
| 53 |
import vertexai
|
| 54 |
except ImportError:
|
|
@@ -62,18 +63,133 @@ peft_model = None
|
|
| 62 |
active_model = None
|
| 63 |
logger = logging.getLogger(__name__)
|
| 64 |
|
| 65 |
-
TEXT_ONLY_INSTRUCTIONS = """
|
| 66 |
-
NOTE: You are operating in TEXT-ONLY mode. The video file could not be analyzed directly.
|
| 67 |
-
You must rely entirely on the provided Context (Caption and Transcript) to deduce the veracity.
|
| 68 |
-
If the text lacks sufficient detail to score visual or audio integrity, score them as 5 (Neutral/Unknown).
|
| 69 |
-
"""
|
| 70 |
-
|
| 71 |
-
def get_formatted_tag_list():
|
| 72 |
-
return "Suggested tags: politics, satire, deepfake, misleading, true, news"
|
| 73 |
-
|
| 74 |
def load_models():
|
| 75 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
def extract_json_from_text(text):
|
| 78 |
try:
|
| 79 |
match = re.search(r'\{[\s\S]*\}', text)
|
|
@@ -82,39 +198,51 @@ def extract_json_from_text(text):
|
|
| 82 |
except:
|
| 83 |
pass
|
| 84 |
return {}
|
| 85 |
-
|
| 86 |
-
def smart_merge(base, new_data):
|
| 87 |
-
if not isinstance(new_data, dict): return new_data if new_data else base
|
| 88 |
-
if not isinstance(base, dict): return new_data
|
| 89 |
-
for k, v in new_data.items():
|
| 90 |
-
if k not in base: base[k] = v
|
| 91 |
-
else:
|
| 92 |
-
if isinstance(base[k], dict) and isinstance(v, dict): smart_merge(base[k], v)
|
| 93 |
-
else:
|
| 94 |
-
base_val = base[k]
|
| 95 |
-
new_val = v
|
| 96 |
-
is_base_valid = base_val and str(base_val) != "0" and str(base_val).lower() != "n/a"
|
| 97 |
-
is_new_valid = new_val and str(new_val) != "0" and str(new_val).lower() != "n/a"
|
| 98 |
-
if not is_base_valid and is_new_valid: base[k] = new_val
|
| 99 |
-
return base
|
| 100 |
|
| 101 |
def validate_parsed_data(data, is_text_only):
|
| 102 |
missing =[]
|
|
|
|
| 103 |
if not data.get('video_context_summary'): missing.append("summary")
|
|
|
|
| 104 |
final = data.get('final_assessment', {})
|
| 105 |
if not final.get('reasoning') or len(str(final.get('reasoning', ''))) < 5: missing.append("final:reasoning")
|
|
|
|
| 106 |
vectors = data.get('veracity_vectors', {})
|
| 107 |
-
|
|
|
|
| 108 |
if k in['visual_integrity_score', 'audio_integrity_score'] and is_text_only: continue
|
| 109 |
v = vectors.get(k)
|
| 110 |
if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"vector:{k}")
|
|
|
|
| 111 |
mod = data.get('modalities', {})
|
| 112 |
for k in['video_audio_score', 'video_caption_score', 'audio_caption_score']:
|
| 113 |
if k in['video_audio_score', 'video_caption_score'] and is_text_only: continue
|
| 114 |
v = mod.get(k)
|
| 115 |
if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"modality:{k}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
return missing
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
def save_debug_log(request_id, kind, content, attempt, label=""):
|
| 119 |
if not request_id: return
|
| 120 |
try:
|
|
@@ -128,82 +256,42 @@ def save_debug_log(request_id, kind, content, attempt, label=""):
|
|
| 128 |
except Exception as e:
|
| 129 |
logger.error(f"Failed to save debug log: {e}")
|
| 130 |
|
| 131 |
-
async def attempt_toon_repair(original_text: str, schema: str, client, model_type: str, config: dict):
|
| 132 |
-
logger.info("Attempting TOON Repair...")
|
| 133 |
-
repair_prompt = f"SYSTEM: Reformat the following text into strict TOON schema. Infer missing scores as 0.\n\nSCHEMA:\n{schema}\n\nINPUT:\n{original_text}\n"
|
| 134 |
-
try:
|
| 135 |
-
loop = asyncio.get_event_loop()
|
| 136 |
-
repaired_text = ""
|
| 137 |
-
if model_type == 'gemini':
|
| 138 |
-
model = genai_legacy.GenerativeModel("models/gemini-2.0-flash-exp")
|
| 139 |
-
response = await loop.run_in_executor(None, lambda: model.generate_content(repair_prompt))
|
| 140 |
-
repaired_text = response.text
|
| 141 |
-
elif model_type == 'vertex':
|
| 142 |
-
cl = client if client else genai.Client(vertexai=True, project=config['project_id'], location=config['location'])
|
| 143 |
-
response = await loop.run_in_executor(None, lambda: cl.models.generate_content(model=config['model_name'], contents=repair_prompt))
|
| 144 |
-
repaired_text = response.text
|
| 145 |
-
return repaired_text
|
| 146 |
-
except Exception as e:
|
| 147 |
-
logger.error(f"Repair failed: {e}")
|
| 148 |
-
return original_text
|
| 149 |
-
|
| 150 |
async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript: str, gemini_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
|
| 151 |
if genai_legacy is None:
|
| 152 |
yield "ERROR: Legacy SDK missing.\n"
|
| 153 |
return
|
| 154 |
-
|
| 155 |
api_key = gemini_config.get("api_key")
|
| 156 |
-
if not api_key:
|
| 157 |
-
yield "ERROR: No Gemini API Key provided."
|
| 158 |
-
return
|
| 159 |
-
|
| 160 |
max_retries = int(gemini_config.get("max_retries", 1))
|
| 161 |
-
|
| 162 |
-
safety_settings =[
|
| 163 |
-
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
|
| 164 |
-
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
|
| 165 |
-
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
|
| 166 |
-
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
|
| 167 |
-
]
|
| 168 |
-
|
| 169 |
try:
|
| 170 |
genai_legacy.configure(api_key=api_key)
|
| 171 |
loop = asyncio.get_event_loop()
|
| 172 |
uploaded_file = None
|
| 173 |
is_text_only = False
|
| 174 |
-
|
| 175 |
if video_path and os.path.exists(video_path):
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
while True:
|
| 180 |
-
uploaded_file = await loop.run_in_executor(None, lambda: genai_legacy.get_file(uploaded_file.name))
|
| 181 |
-
state_name = uploaded_file.state.name
|
| 182 |
-
if state_name == "ACTIVE": break
|
| 183 |
-
elif state_name == "FAILED":
|
| 184 |
-
yield "ERROR: Google failed to process video."
|
| 185 |
-
return
|
| 186 |
-
if time.time() - wait_start > 300:
|
| 187 |
-
yield "ERROR: Video processing timed out."
|
| 188 |
-
return
|
| 189 |
-
yield "Processing video on Google servers..."
|
| 190 |
-
await asyncio.sleep(5)
|
| 191 |
-
else:
|
| 192 |
-
is_text_only = True
|
| 193 |
|
| 194 |
-
|
| 195 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
|
| 197 |
score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
|
|
|
|
| 198 |
|
| 199 |
-
if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
|
| 200 |
-
|
| 201 |
-
raw_text = ""
|
| 202 |
-
prompt_used = ""
|
| 203 |
-
gen_config = {"temperature": 0.1}
|
| 204 |
accumulated_data = {}
|
|
|
|
| 205 |
fcot_trace = {}
|
| 206 |
full_raw_text = ""
|
|
|
|
| 207 |
|
| 208 |
for attempt in range(max_retries + 1):
|
| 209 |
raw_text = ""
|
|
@@ -228,52 +316,46 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 228 |
save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
|
| 229 |
else:
|
| 230 |
if reasoning_method == "fcot":
|
| 231 |
-
yield "Starting
|
| 232 |
chat = model.start_chat(history=[])
|
| 233 |
|
| 234 |
macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
|
| 235 |
-
if is_text_only: macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
|
| 236 |
save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
|
| 237 |
-
|
| 238 |
-
inputs1 =[macro_prompt]
|
| 239 |
if uploaded_file: inputs1.insert(0, uploaded_file)
|
| 240 |
-
|
| 241 |
-
res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1, safety_settings=safety_settings))
|
| 242 |
macro_hypothesis = res1.text
|
| 243 |
save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
|
| 244 |
fcot_trace['macro'] = macro_hypothesis
|
| 245 |
-
yield f"Hypothesis: {macro_hypothesis[:100]}...\n"
|
| 246 |
|
| 247 |
meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
|
| 248 |
save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
|
| 249 |
-
res2 = await loop.run_in_executor(None, lambda: chat.send_message(meso_prompt
|
| 250 |
micro_observations = res2.text
|
| 251 |
save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
|
| 252 |
fcot_trace['meso'] = micro_observations
|
| 253 |
-
|
| 254 |
-
synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=
|
| 255 |
save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
|
| 256 |
-
res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt
|
| 257 |
raw_text = res3.text
|
| 258 |
save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
|
| 259 |
-
prompt_used = f"FCoT:\
|
| 260 |
else:
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
| 263 |
prompt_used = prompt_text
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
inputs = [prompt_text]
|
| 268 |
if uploaded_file: inputs.append(uploaded_file)
|
| 269 |
-
|
| 270 |
-
response = await loop.run_in_executor(
|
| 271 |
-
None,
|
| 272 |
-
lambda: model.generate_content(inputs, generation_config=gen_config, safety_settings=safety_settings)
|
| 273 |
-
)
|
| 274 |
raw_text = response.text
|
| 275 |
-
save_debug_log(request_id, 'response', raw_text, attempt, '
|
| 276 |
-
|
| 277 |
if raw_text:
|
| 278 |
full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
|
| 279 |
parsed_step = parse_veracity_toon(raw_text)
|
|
@@ -286,28 +368,20 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 286 |
else:
|
| 287 |
parsed_step[k] = json_data[k]
|
| 288 |
accumulated_data = smart_merge(accumulated_data, parsed_step)
|
| 289 |
-
|
| 290 |
missing_fields = validate_parsed_data(accumulated_data, is_text_only)
|
| 291 |
if not missing_fields:
|
| 292 |
-
yield "Validation Passed. All factuality components processed and confidence scores obtained.\n"
|
| 293 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 294 |
break
|
| 295 |
-
|
| 296 |
if attempt == max_retries:
|
| 297 |
yield f"Max retries reached. Saving incomplete data.\n"
|
| 298 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 299 |
break
|
| 300 |
|
| 301 |
-
if uploaded_file:
|
| 302 |
-
|
| 303 |
-
await loop.run_in_executor(None, lambda: genai_legacy.delete_file(name=uploaded_file.name))
|
| 304 |
-
except Exception:
|
| 305 |
-
pass
|
| 306 |
-
|
| 307 |
-
except Exception as e:
|
| 308 |
-
logger.error(f"Gemini Pipeline Error: {e}", exc_info=True)
|
| 309 |
-
yield f"ERROR (Gemini): {e}"
|
| 310 |
-
|
| 311 |
|
| 312 |
async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript: str, vertex_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
|
| 313 |
if genai is None:
|
|
@@ -315,52 +389,55 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 315 |
return
|
| 316 |
|
| 317 |
project_id = vertex_config.get("project_id")
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
|
|
|
| 321 |
|
| 322 |
-
|
| 323 |
-
SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="BLOCK_ONLY_HIGH"),
|
| 324 |
-
SafetySetting(category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="BLOCK_ONLY_HIGH"),
|
| 325 |
-
SafetySetting(category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="BLOCK_ONLY_HIGH"),
|
| 326 |
-
SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="BLOCK_ONLY_HIGH"),
|
| 327 |
-
]
|
| 328 |
|
| 329 |
try:
|
| 330 |
-
|
| 331 |
if api_key:
|
| 332 |
-
client = genai.Client(vertexai=True, project=project_id, location=
|
| 333 |
else:
|
| 334 |
-
client = genai.Client(vertexai=True, project=project_id, location=
|
| 335 |
-
|
| 336 |
video_part = None
|
| 337 |
is_text_only = False
|
| 338 |
if video_path and os.path.exists(video_path):
|
| 339 |
with open(video_path, 'rb') as f: video_bytes = f.read()
|
| 340 |
video_part = Part.from_bytes(data=video_bytes, mime_type="video/mp4")
|
| 341 |
-
else:
|
| 342 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
-
toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
|
| 345 |
-
score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
|
| 346 |
-
model_name = vertex_config.get("model_name", "gemini-2.5-flash-lite")
|
| 347 |
-
max_retries = int(vertex_config.get("max_retries", 1))
|
| 348 |
-
|
| 349 |
-
raw_text = ""
|
| 350 |
-
prompt_used = ""
|
| 351 |
-
loop = asyncio.get_event_loop()
|
| 352 |
config = GenerateContentConfig(
|
| 353 |
-
temperature=0.1,
|
| 354 |
-
|
| 355 |
-
tools=[Tool(google_search=GoogleSearch())] if vertex_config.get("use_search", True) else None,
|
| 356 |
-
safety_settings=safety_settings
|
| 357 |
)
|
| 358 |
|
| 359 |
-
|
| 360 |
-
|
|
|
|
|
|
|
| 361 |
accumulated_data = {}
|
|
|
|
| 362 |
fcot_trace = {}
|
| 363 |
full_raw_text = ""
|
|
|
|
|
|
|
|
|
|
| 364 |
|
| 365 |
for attempt in range(max_retries + 1):
|
| 366 |
raw_text = ""
|
|
@@ -368,6 +445,7 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 368 |
missing = validate_parsed_data(accumulated_data, is_text_only)
|
| 369 |
yield f"Validation failed. Missing or incomplete fields: {missing}. Initiating Iterative Reprompt (Attempt {attempt}/{max_retries}) to acquire remaining factuality components...\n"
|
| 370 |
|
|
|
|
| 371 |
prompt_text = (
|
| 372 |
f"SYSTEM: Review the previous attempt which failed validation.\n"
|
| 373 |
f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
|
|
@@ -388,12 +466,12 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 388 |
save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
|
| 389 |
else:
|
| 390 |
if reasoning_method == "fcot":
|
| 391 |
-
yield "Starting
|
| 392 |
chat = client.chats.create(model=model_name, config=config)
|
| 393 |
|
| 394 |
macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
|
| 395 |
save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
|
| 396 |
-
inputs1 =[macro_prompt]
|
| 397 |
if video_part: inputs1.insert(0, video_part)
|
| 398 |
else: inputs1[0] = "NOTE: Text Only Analysis.\n" + inputs1[0]
|
| 399 |
|
|
@@ -401,7 +479,6 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 401 |
macro_hypothesis = res1.text
|
| 402 |
save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
|
| 403 |
fcot_trace['macro'] = macro_hypothesis
|
| 404 |
-
yield f"Hypothesis: {macro_hypothesis[:80]}...\n"
|
| 405 |
|
| 406 |
meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
|
| 407 |
save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
|
|
@@ -410,31 +487,27 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 410 |
save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
|
| 411 |
fcot_trace['meso'] = micro_observations
|
| 412 |
|
| 413 |
-
synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=
|
| 414 |
save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
|
| 415 |
res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
|
| 416 |
raw_text = res3.text
|
| 417 |
save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
|
| 418 |
-
prompt_used = f"FCoT (Vertex):\
|
| 419 |
-
|
| 420 |
else:
|
| 421 |
-
|
| 422 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
if video_part: contents =[video_part, prompt_text]
|
| 424 |
else: contents =[f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"]
|
| 425 |
prompt_used = prompt_text
|
| 426 |
-
save_debug_log(request_id, 'prompt', prompt_text, attempt, '
|
| 427 |
-
yield f"Generating Labels ({
|
| 428 |
-
response = await loop.run_in_executor(
|
| 429 |
-
None,
|
| 430 |
-
lambda: client.models.generate_content(model=model_name, contents=contents, config=config)
|
| 431 |
-
)
|
| 432 |
raw_text = response.text
|
| 433 |
-
save_debug_log(request_id, 'response', raw_text, attempt, '
|
| 434 |
-
|
| 435 |
-
if not raw_text:
|
| 436 |
-
yield {"error": "Empty Response"}
|
| 437 |
-
return
|
| 438 |
|
| 439 |
if raw_text:
|
| 440 |
full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
|
|
@@ -451,7 +524,7 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 451 |
|
| 452 |
missing_fields = validate_parsed_data(accumulated_data, is_text_only)
|
| 453 |
if not missing_fields:
|
| 454 |
-
yield "Validation Passed. All factuality components processed and confidence scores obtained.\n"
|
| 455 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 456 |
break
|
| 457 |
|
|
@@ -459,12 +532,11 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 459 |
yield f"Max retries reached. Saving incomplete data.\n"
|
| 460 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 461 |
break
|
| 462 |
-
|
| 463 |
except Exception as e:
|
| 464 |
-
yield f"ERROR
|
| 465 |
logger.error("Vertex Labeling Error", exc_info=True)
|
| 466 |
|
| 467 |
-
|
| 468 |
async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: str, nrp_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
|
| 469 |
api_key = nrp_config.get("api_key")
|
| 470 |
model_name = nrp_config.get("model_name", "gpt-4")
|
|
@@ -482,6 +554,11 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
|
|
| 482 |
|
| 483 |
is_text_only = True
|
| 484 |
system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 485 |
|
| 486 |
toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
|
| 487 |
score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
|
|
@@ -499,11 +576,30 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
|
|
| 499 |
"messages": messages,
|
| 500 |
"temperature": 0.1
|
| 501 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
def do_request():
|
|
|
|
|
|
|
|
|
|
| 503 |
resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
if resp.status_code != 200:
|
|
|
|
| 505 |
raise Exception(f"API Error {resp.status_code}: {resp.text}")
|
| 506 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 507 |
return await loop.run_in_executor(None, do_request)
|
| 508 |
|
| 509 |
try:
|
|
@@ -511,63 +607,85 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
|
|
| 511 |
raw_text = ""
|
| 512 |
if attempt > 0:
|
| 513 |
missing = validate_parsed_data(accumulated_data, is_text_only)
|
| 514 |
-
yield f"Validation failed. Missing fields: {missing}. Initiating Reprompt...\n"
|
|
|
|
| 515 |
prompt_text = (
|
| 516 |
f"SYSTEM: Review the previous attempt which failed validation.\n"
|
| 517 |
f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
|
| 518 |
f"PREVIOUS (PARTIAL) DATA: {json.dumps(accumulated_data, indent=2)}\n"
|
| 519 |
f"MISSING FIELDS: {missing}\n"
|
| 520 |
-
f"INSTRUCTION: Generate the missing fields to complete the schema.\n"
|
|
|
|
| 521 |
f"{toon_schema}"
|
| 522 |
)
|
|
|
|
| 523 |
save_debug_log(request_id, 'prompt', prompt_text, attempt, 'reprompt')
|
|
|
|
|
|
|
| 524 |
raw_text = await _call_nrp([
|
| 525 |
{"role": "system", "content": system_persona},
|
| 526 |
{"role": "user", "content": prompt_text}
|
| 527 |
-
])
|
|
|
|
|
|
|
| 528 |
save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
|
| 529 |
else:
|
| 530 |
if reasoning_method == "fcot":
|
| 531 |
yield "Starting Fractal Chain of Thought (NRP FCoT)...\n"
|
|
|
|
| 532 |
macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
|
| 533 |
macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
|
| 534 |
save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
|
| 535 |
|
| 536 |
macro_messages =[{"role": "system", "content": system_persona}, {"role": "user", "content": macro_prompt}]
|
| 537 |
-
|
|
|
|
|
|
|
|
|
|
| 538 |
save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
|
| 539 |
fcot_trace['macro'] = macro_hypothesis
|
| 540 |
|
| 541 |
meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
|
| 542 |
save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
|
| 543 |
-
|
| 544 |
meso_messages = macro_messages +[{"role": "assistant", "content": macro_hypothesis}, {"role": "user", "content": meso_prompt}]
|
| 545 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
|
| 547 |
fcot_trace['meso'] = micro_observations
|
| 548 |
|
| 549 |
synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
|
| 550 |
save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
|
| 551 |
-
|
| 552 |
synthesis_messages = meso_messages +[{"role": "assistant", "content": micro_observations}, {"role": "user", "content": synthesis_prompt}]
|
| 553 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
|
| 555 |
prompt_used = f"FCoT (NRP):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
|
| 556 |
|
| 557 |
else:
|
| 558 |
-
|
|
|
|
| 559 |
system_persona=system_persona, caption=caption, transcript=transcript,
|
| 560 |
toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
|
| 561 |
)
|
| 562 |
prompt_text = f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"
|
| 563 |
prompt_used = prompt_text
|
| 564 |
-
save_debug_log(request_id, 'prompt', prompt_text, attempt, '
|
| 565 |
-
yield "Generating Labels (NRP
|
|
|
|
|
|
|
| 566 |
raw_text = await _call_nrp([
|
| 567 |
{"role": "system", "content": system_persona},
|
| 568 |
{"role": "user", "content": prompt_text}
|
| 569 |
-
])
|
| 570 |
-
|
|
|
|
|
|
|
| 571 |
|
| 572 |
if raw_text:
|
| 573 |
full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
|
|
@@ -584,14 +702,10 @@ async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: s
|
|
| 584 |
|
| 585 |
missing_fields = validate_parsed_data(accumulated_data, is_text_only)
|
| 586 |
if not missing_fields:
|
| 587 |
-
yield "Validation Passed.\n"
|
| 588 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 589 |
break
|
| 590 |
|
| 591 |
if attempt == max_retries:
|
| 592 |
-
yield
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
except Exception as e:
|
| 596 |
-
yield f"ERROR: {e}\n\n"
|
| 597 |
-
logger.error("NRP Labeling Error", exc_info=True)
|
|
|
|
| 1 |
+
import torch
|
| 2 |
import re
|
| 3 |
+
import ast
|
| 4 |
import sys
|
| 5 |
import os
|
|
|
|
| 6 |
import logging
|
| 7 |
import asyncio
|
| 8 |
import json
|
|
|
|
| 9 |
import datetime
|
| 10 |
+
import requests
|
| 11 |
|
|
|
|
| 12 |
try:
|
| 13 |
from transformers import Qwen3VLForConditionalGeneration, AutoProcessor
|
| 14 |
from peft import PeftModel
|
|
|
|
| 17 |
AutoProcessor = None
|
| 18 |
PeftModel = None
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
try:
|
| 21 |
from my_vision_process import process_vision_info, client
|
| 22 |
except ImportError:
|
| 23 |
process_vision_info = None
|
| 24 |
client = None
|
| 25 |
|
| 26 |
+
from labeling_logic import (
|
| 27 |
+
LABELING_PROMPT_TEMPLATE, LABELING_PROMPT_TEMPLATE_NO_COT,
|
| 28 |
+
SCORE_INSTRUCTIONS_SIMPLE, SCORE_INSTRUCTIONS_REASONING,
|
| 29 |
+
SCHEMA_SIMPLE, SCHEMA_REASONING,
|
| 30 |
+
FCOT_MACRO_PROMPT, FCOT_MESO_PROMPT, FCOT_SYNTHESIS_PROMPT, TEXT_ONLY_INSTRUCTIONS,
|
| 31 |
+
get_formatted_tag_list
|
| 32 |
+
)
|
| 33 |
+
from toon_parser import parse_veracity_toon
|
| 34 |
+
|
| 35 |
# Google GenAI Imports
|
| 36 |
try:
|
| 37 |
import google.generativeai as genai_legacy
|
| 38 |
+
from google.generativeai.types import generation_types
|
| 39 |
except ImportError:
|
| 40 |
genai_legacy = None
|
| 41 |
|
| 42 |
try:
|
| 43 |
+
# Modern Google GenAI SDK (v1)
|
| 44 |
from google import genai
|
| 45 |
from google.genai.types import (
|
| 46 |
GenerateContentConfig,
|
|
|
|
| 49 |
Tool,
|
| 50 |
VertexAISearch,
|
| 51 |
GoogleSearch,
|
| 52 |
+
Part
|
|
|
|
| 53 |
)
|
| 54 |
import vertexai
|
| 55 |
except ImportError:
|
|
|
|
| 63 |
active_model = None
|
| 64 |
logger = logging.getLogger(__name__)
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
def load_models():
|
| 67 |
+
global LITE_MODE, processor, base_model, peft_model, active_model
|
| 68 |
+
|
| 69 |
+
if LITE_MODE:
|
| 70 |
+
logger.info("LITE_MODE is enabled. Skipping local model loading.")
|
| 71 |
+
return
|
| 72 |
+
|
| 73 |
+
if base_model is not None: return
|
| 74 |
+
|
| 75 |
+
if not torch.cuda.is_available():
|
| 76 |
+
logger.warning("CUDA is not available. This application requires a GPU for local models. Switching to LITE_MODE.")
|
| 77 |
+
LITE_MODE = True
|
| 78 |
+
return
|
| 79 |
+
|
| 80 |
+
device = torch.device("cuda")
|
| 81 |
+
logger.info(f"CUDA is available. Initializing models on {device}...")
|
| 82 |
+
local_model_path = "/app/local_model"
|
| 83 |
+
|
| 84 |
+
try:
|
| 85 |
+
import flash_attn
|
| 86 |
+
attn_implementation = "flash_attention_2"
|
| 87 |
+
except ImportError:
|
| 88 |
+
attn_implementation = "sdpa"
|
| 89 |
+
|
| 90 |
+
logger.info(f"Loading base model from {local_model_path}...")
|
| 91 |
+
try:
|
| 92 |
+
base_model = Qwen3VLForConditionalGeneration.from_pretrained(
|
| 93 |
+
local_model_path, dtype=torch.bfloat16, device_map="auto", attn_implementation=attn_implementation
|
| 94 |
+
).eval()
|
| 95 |
+
processor = AutoProcessor.from_pretrained(local_model_path)
|
| 96 |
+
active_model = base_model
|
| 97 |
+
except Exception as e:
|
| 98 |
+
logger.error(f"Failed to load local model: {e}")
|
| 99 |
+
LITE_MODE = True
|
| 100 |
+
|
| 101 |
+
def switch_active_model(model_name: str):
|
| 102 |
+
global active_model, base_model, peft_model
|
| 103 |
+
if model_name == "custom" and peft_model is not None:
|
| 104 |
+
active_model = peft_model
|
| 105 |
+
else:
|
| 106 |
+
active_model = base_model
|
| 107 |
+
|
| 108 |
+
def inference_step(video_path, prompt, generation_kwargs, sampling_fps, pred_glue=None):
|
| 109 |
+
global processor, active_model
|
| 110 |
+
if active_model is None: raise RuntimeError("Models not loaded.")
|
| 111 |
+
|
| 112 |
+
messages =[
|
| 113 |
+
{"role": "user", "content":[
|
| 114 |
+
{"type": "video", "video": video_path, 'key_time': pred_glue, 'fps': sampling_fps,
|
| 115 |
+
"total_pixels": 128*12 * 28 * 28, "min_pixels": 128 * 28 * 28},
|
| 116 |
+
{"type": "text", "text": prompt},
|
| 117 |
+
]
|
| 118 |
+
},
|
| 119 |
+
]
|
| 120 |
+
text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 121 |
+
image_inputs, video_inputs, video_kwargs = process_vision_info(messages, return_video_kwargs=True, client=client)
|
| 122 |
+
fps_inputs = video_kwargs['fps'][0]
|
| 123 |
+
inputs = processor(text=[text], images=image_inputs, videos=video_inputs, fps=fps_inputs, padding=True, return_tensors="pt")
|
| 124 |
+
inputs = {k: v.to(active_model.device) for k, v in inputs.items()}
|
| 125 |
+
|
| 126 |
+
with torch.no_grad():
|
| 127 |
+
output_ids = active_model.generate(**inputs, **generation_kwargs, use_cache=True)
|
| 128 |
|
| 129 |
+
generated_ids = [output_ids[i][len(inputs['input_ids'][i]):] for i in range(len(output_ids))]
|
| 130 |
+
output_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
|
| 131 |
+
return output_text[0]
|
| 132 |
+
|
| 133 |
+
async def generate_simple_text(prompt: str, model_type: str, config: dict):
|
| 134 |
+
loop = asyncio.get_event_loop()
|
| 135 |
+
try:
|
| 136 |
+
if model_type == 'gemini':
|
| 137 |
+
if genai_legacy is None: return "Error: Legacy SDK missing."
|
| 138 |
+
genai_legacy.configure(api_key=config.get("api_key"))
|
| 139 |
+
model = genai_legacy.GenerativeModel(config.get("model_name", "models/gemini-2.0-flash-exp"))
|
| 140 |
+
response = await loop.run_in_executor(
|
| 141 |
+
None,
|
| 142 |
+
lambda: model.generate_content(prompt, generation_config={"temperature": 0.0})
|
| 143 |
+
)
|
| 144 |
+
return response.text
|
| 145 |
+
|
| 146 |
+
elif model_type == 'vertex':
|
| 147 |
+
if genai is None: return "Error: Vertex SDK missing."
|
| 148 |
+
api_key = config.get("api_key")
|
| 149 |
+
if api_key:
|
| 150 |
+
cl = genai.Client(vertexai=True, project=config['project_id'], location=config['location'], api_key=api_key)
|
| 151 |
+
else:
|
| 152 |
+
cl = genai.Client(vertexai=True, project=config['project_id'], location=config['location'])
|
| 153 |
+
response = await loop.run_in_executor(
|
| 154 |
+
None,
|
| 155 |
+
lambda: cl.models.generate_content(
|
| 156 |
+
model=config['model_name'],
|
| 157 |
+
contents=prompt,
|
| 158 |
+
config=GenerateContentConfig(temperature=0.0)
|
| 159 |
+
)
|
| 160 |
+
)
|
| 161 |
+
return response.text
|
| 162 |
+
|
| 163 |
+
elif model_type == 'nrp':
|
| 164 |
+
api_key = config.get("api_key")
|
| 165 |
+
model_name = config.get("model_name", "gpt-4")
|
| 166 |
+
base_url = config.get("base_url", "https://api.openai.com/v1").rstrip("/")
|
| 167 |
+
if not api_key: return "Error: NRP API key missing."
|
| 168 |
+
headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
|
| 169 |
+
payload = {"model": model_name, "messages":[{"role": "user", "content": prompt}], "temperature": 0.0}
|
| 170 |
+
def do_request():
|
| 171 |
+
resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
|
| 172 |
+
if resp.status_code == 200:
|
| 173 |
+
return resp.json()["choices"][0]["message"]["content"]
|
| 174 |
+
return f"Error: {resp.status_code} {resp.text}"
|
| 175 |
+
return await loop.run_in_executor(None, do_request)
|
| 176 |
+
|
| 177 |
+
except Exception as e:
|
| 178 |
+
logger.error(f"Text Gen Error: {e}")
|
| 179 |
+
return f"Error generating text: {e}"
|
| 180 |
+
|
| 181 |
+
async def generate_community_summary(comments: list, model_type: str, config: dict):
|
| 182 |
+
if not comments: return "No comments available."
|
| 183 |
+
c_text = "\n".join([f"- {c.get('author', 'User')}: {c.get('text', '')}" for c in comments[:15]])
|
| 184 |
+
prompt = (
|
| 185 |
+
"You are a Community Context Analyst. Analyze the following user comments regarding a social media post.\n"
|
| 186 |
+
"Your goal is to extract 'Community Notes' - specifically looking for fact-checking, debunking, or additional context provided by users.\n"
|
| 187 |
+
f"COMMENTS:\n{c_text}\n\n"
|
| 188 |
+
"OUTPUT:\n"
|
| 189 |
+
"Provide a concise 1-paragraph summary of the community consensus regarding the veracity of the post."
|
| 190 |
+
)
|
| 191 |
+
return await generate_simple_text(prompt, model_type, config)
|
| 192 |
+
|
| 193 |
def extract_json_from_text(text):
|
| 194 |
try:
|
| 195 |
match = re.search(r'\{[\s\S]*\}', text)
|
|
|
|
| 198 |
except:
|
| 199 |
pass
|
| 200 |
return {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
def validate_parsed_data(data, is_text_only):
|
| 203 |
missing =[]
|
| 204 |
+
|
| 205 |
if not data.get('video_context_summary'): missing.append("summary")
|
| 206 |
+
|
| 207 |
final = data.get('final_assessment', {})
|
| 208 |
if not final.get('reasoning') or len(str(final.get('reasoning', ''))) < 5: missing.append("final:reasoning")
|
| 209 |
+
|
| 210 |
vectors = data.get('veracity_vectors', {})
|
| 211 |
+
required_vectors =['visual_integrity_score', 'audio_integrity_score', 'source_credibility_score', 'logical_consistency_score', 'emotional_manipulation_score']
|
| 212 |
+
for k in required_vectors:
|
| 213 |
if k in['visual_integrity_score', 'audio_integrity_score'] and is_text_only: continue
|
| 214 |
v = vectors.get(k)
|
| 215 |
if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"vector:{k}")
|
| 216 |
+
|
| 217 |
mod = data.get('modalities', {})
|
| 218 |
for k in['video_audio_score', 'video_caption_score', 'audio_caption_score']:
|
| 219 |
if k in['video_audio_score', 'video_caption_score'] and is_text_only: continue
|
| 220 |
v = mod.get(k)
|
| 221 |
if not v or str(v) == '0' or str(v).lower() == 'n/a': missing.append(f"modality:{k}")
|
| 222 |
+
|
| 223 |
+
fact = data.get('factuality_factors', {})
|
| 224 |
+
if not fact.get('claim_accuracy'): missing.append("factuality:claim_accuracy")
|
| 225 |
+
|
| 226 |
+
disinfo = data.get('disinformation_analysis', {})
|
| 227 |
+
if not disinfo.get('classification'): missing.append("disinfo:classification")
|
| 228 |
+
|
| 229 |
return missing
|
| 230 |
|
| 231 |
+
def smart_merge(base, new_data):
|
| 232 |
+
if not isinstance(new_data, dict): return new_data if new_data else base
|
| 233 |
+
if not isinstance(base, dict): return new_data
|
| 234 |
+
for k, v in new_data.items():
|
| 235 |
+
if k not in base: base[k] = v
|
| 236 |
+
else:
|
| 237 |
+
if isinstance(base[k], dict) and isinstance(v, dict): smart_merge(base[k], v)
|
| 238 |
+
else:
|
| 239 |
+
base_val = base[k]
|
| 240 |
+
new_val = v
|
| 241 |
+
is_base_valid = base_val and str(base_val) != "0" and str(base_val).lower() != "n/a"
|
| 242 |
+
is_new_valid = new_val and str(new_val) != "0" and str(new_val).lower() != "n/a"
|
| 243 |
+
if not is_base_valid and is_new_valid: base[k] = new_val
|
| 244 |
+
return base
|
| 245 |
+
|
| 246 |
def save_debug_log(request_id, kind, content, attempt, label=""):
|
| 247 |
if not request_id: return
|
| 248 |
try:
|
|
|
|
| 256 |
except Exception as e:
|
| 257 |
logger.error(f"Failed to save debug log: {e}")
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript: str, gemini_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
|
| 260 |
if genai_legacy is None:
|
| 261 |
yield "ERROR: Legacy SDK missing.\n"
|
| 262 |
return
|
|
|
|
| 263 |
api_key = gemini_config.get("api_key")
|
| 264 |
+
if not api_key: return
|
|
|
|
|
|
|
|
|
|
| 265 |
max_retries = int(gemini_config.get("max_retries", 1))
|
| 266 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
try:
|
| 268 |
genai_legacy.configure(api_key=api_key)
|
| 269 |
loop = asyncio.get_event_loop()
|
| 270 |
uploaded_file = None
|
| 271 |
is_text_only = False
|
|
|
|
| 272 |
if video_path and os.path.exists(video_path):
|
| 273 |
+
uploaded_file = await loop.run_in_executor(None, lambda: genai_legacy.upload_file(path=video_path))
|
| 274 |
+
while uploaded_file.state.name == "PROCESSING": await asyncio.sleep(2)
|
| 275 |
+
else: is_text_only = True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
|
| 277 |
+
active_tools =[]
|
| 278 |
+
if gemini_config.get("use_search", False):
|
| 279 |
+
active_tools.append({"google_search_retrieval": {}})
|
| 280 |
+
system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
|
| 281 |
+
if gemini_config.get("use_code", False):
|
| 282 |
+
active_tools.append({"code_execution": {}})
|
| 283 |
+
system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
|
| 284 |
+
|
| 285 |
+
model = genai_legacy.GenerativeModel("models/gemini-2.0-flash-exp", tools=active_tools if active_tools else None)
|
| 286 |
toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
|
| 287 |
score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
|
| 288 |
+
tag_list_text = get_formatted_tag_list()
|
| 289 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
accumulated_data = {}
|
| 291 |
+
prompt_used = ""
|
| 292 |
fcot_trace = {}
|
| 293 |
full_raw_text = ""
|
| 294 |
+
if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
|
| 295 |
|
| 296 |
for attempt in range(max_retries + 1):
|
| 297 |
raw_text = ""
|
|
|
|
| 316 |
save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
|
| 317 |
else:
|
| 318 |
if reasoning_method == "fcot":
|
| 319 |
+
yield "Starting Fractal Chain of Thought (Gemini FCoT)..."
|
| 320 |
chat = model.start_chat(history=[])
|
| 321 |
|
| 322 |
macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
|
|
|
|
| 323 |
save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
|
| 324 |
+
inputs1 = [macro_prompt]
|
|
|
|
| 325 |
if uploaded_file: inputs1.insert(0, uploaded_file)
|
| 326 |
+
res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1))
|
|
|
|
| 327 |
macro_hypothesis = res1.text
|
| 328 |
save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
|
| 329 |
fcot_trace['macro'] = macro_hypothesis
|
|
|
|
| 330 |
|
| 331 |
meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
|
| 332 |
save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
|
| 333 |
+
res2 = await loop.run_in_executor(None, lambda: chat.send_message(meso_prompt))
|
| 334 |
micro_observations = res2.text
|
| 335 |
save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
|
| 336 |
fcot_trace['meso'] = micro_observations
|
| 337 |
+
|
| 338 |
+
synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
|
| 339 |
save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
|
| 340 |
+
res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
|
| 341 |
raw_text = res3.text
|
| 342 |
save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
|
| 343 |
+
prompt_used = f"FCoT Pipeline:\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
|
| 344 |
else:
|
| 345 |
+
template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
|
| 346 |
+
prompt_text = template.format(
|
| 347 |
+
system_persona=system_persona, caption=caption, transcript=transcript,
|
| 348 |
+
toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
|
| 349 |
+
)
|
| 350 |
prompt_used = prompt_text
|
| 351 |
+
if is_text_only: prompt_text = "NOTE: Text Analysis Only.\n" + prompt_text
|
| 352 |
+
save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
|
|
|
|
| 353 |
inputs = [prompt_text]
|
| 354 |
if uploaded_file: inputs.append(uploaded_file)
|
| 355 |
+
response = await loop.run_in_executor(None, lambda: model.generate_content(inputs, generation_config={"temperature": 0.1}))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
raw_text = response.text
|
| 357 |
+
save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
|
| 358 |
+
|
| 359 |
if raw_text:
|
| 360 |
full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
|
| 361 |
parsed_step = parse_veracity_toon(raw_text)
|
|
|
|
| 368 |
else:
|
| 369 |
parsed_step[k] = json_data[k]
|
| 370 |
accumulated_data = smart_merge(accumulated_data, parsed_step)
|
| 371 |
+
|
| 372 |
missing_fields = validate_parsed_data(accumulated_data, is_text_only)
|
| 373 |
if not missing_fields:
|
| 374 |
+
yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
|
| 375 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 376 |
break
|
| 377 |
+
|
| 378 |
if attempt == max_retries:
|
| 379 |
yield f"Max retries reached. Saving incomplete data.\n"
|
| 380 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 381 |
break
|
| 382 |
|
| 383 |
+
if uploaded_file: await loop.run_in_executor(None, lambda: genai_legacy.delete_file(name=uploaded_file.name))
|
| 384 |
+
except Exception as e: yield f"ERROR: {e}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
|
| 386 |
async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript: str, vertex_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
|
| 387 |
if genai is None:
|
|
|
|
| 389 |
return
|
| 390 |
|
| 391 |
project_id = vertex_config.get("project_id")
|
| 392 |
+
location = vertex_config.get("location", "us-central1")
|
| 393 |
+
model_name = vertex_config.get("model_name", "gemini-1.5-pro-preview-0409")
|
| 394 |
+
max_retries = int(vertex_config.get("max_retries", 1))
|
| 395 |
+
api_key = vertex_config.get("api_key")
|
| 396 |
|
| 397 |
+
if not project_id: return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
try:
|
| 400 |
+
# Pass api_key directly if available to use API Keys instead of ADC Service Accounts
|
| 401 |
if api_key:
|
| 402 |
+
client = genai.Client(vertexai=True, project=project_id, location=location, api_key=api_key)
|
| 403 |
else:
|
| 404 |
+
client = genai.Client(vertexai=True, project=project_id, location=location)
|
| 405 |
+
|
| 406 |
video_part = None
|
| 407 |
is_text_only = False
|
| 408 |
if video_path and os.path.exists(video_path):
|
| 409 |
with open(video_path, 'rb') as f: video_bytes = f.read()
|
| 410 |
video_part = Part.from_bytes(data=video_bytes, mime_type="video/mp4")
|
| 411 |
+
else: is_text_only = True
|
| 412 |
+
|
| 413 |
+
active_tools =[]
|
| 414 |
+
if vertex_config.get("use_search", False):
|
| 415 |
+
active_tools.append(Tool(google_search=GoogleSearch()))
|
| 416 |
+
system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
|
| 417 |
+
if vertex_config.get("use_code", False):
|
| 418 |
+
try:
|
| 419 |
+
from google.genai.types import CodeExecution
|
| 420 |
+
active_tools.append(Tool(code_execution=CodeExecution()))
|
| 421 |
+
system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
|
| 422 |
+
except ImportError:
|
| 423 |
+
pass
|
| 424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
config = GenerateContentConfig(
|
| 426 |
+
temperature=0.1, response_mime_type="text/plain", max_output_tokens=8192,
|
| 427 |
+
tools=active_tools if active_tools else None
|
|
|
|
|
|
|
| 428 |
)
|
| 429 |
|
| 430 |
+
toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
|
| 431 |
+
score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
|
| 432 |
+
tag_list_text = get_formatted_tag_list()
|
| 433 |
+
|
| 434 |
accumulated_data = {}
|
| 435 |
+
prompt_used = ""
|
| 436 |
fcot_trace = {}
|
| 437 |
full_raw_text = ""
|
| 438 |
+
loop = asyncio.get_event_loop()
|
| 439 |
+
|
| 440 |
+
if is_text_only: system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
|
| 441 |
|
| 442 |
for attempt in range(max_retries + 1):
|
| 443 |
raw_text = ""
|
|
|
|
| 445 |
missing = validate_parsed_data(accumulated_data, is_text_only)
|
| 446 |
yield f"Validation failed. Missing or incomplete fields: {missing}. Initiating Iterative Reprompt (Attempt {attempt}/{max_retries}) to acquire remaining factuality components...\n"
|
| 447 |
|
| 448 |
+
# REPROMPT CONSTRUCTION
|
| 449 |
prompt_text = (
|
| 450 |
f"SYSTEM: Review the previous attempt which failed validation.\n"
|
| 451 |
f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
|
|
|
|
| 466 |
save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
|
| 467 |
else:
|
| 468 |
if reasoning_method == "fcot":
|
| 469 |
+
yield "Starting Fractal Chain of Thought (Vertex FCoT)..."
|
| 470 |
chat = client.chats.create(model=model_name, config=config)
|
| 471 |
|
| 472 |
macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
|
| 473 |
save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
|
| 474 |
+
inputs1 = [macro_prompt]
|
| 475 |
if video_part: inputs1.insert(0, video_part)
|
| 476 |
else: inputs1[0] = "NOTE: Text Only Analysis.\n" + inputs1[0]
|
| 477 |
|
|
|
|
| 479 |
macro_hypothesis = res1.text
|
| 480 |
save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
|
| 481 |
fcot_trace['macro'] = macro_hypothesis
|
|
|
|
| 482 |
|
| 483 |
meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
|
| 484 |
save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
|
|
|
|
| 487 |
save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
|
| 488 |
fcot_trace['meso'] = micro_observations
|
| 489 |
|
| 490 |
+
synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
|
| 491 |
save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
|
| 492 |
res3 = await loop.run_in_executor(None, lambda: chat.send_message(synthesis_prompt))
|
| 493 |
raw_text = res3.text
|
| 494 |
save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
|
| 495 |
+
prompt_used = f"FCoT (Vertex):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
|
|
|
|
| 496 |
else:
|
| 497 |
+
template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
|
| 498 |
+
prompt_text = template.format(
|
| 499 |
+
system_persona=system_persona, caption=caption, transcript=transcript,
|
| 500 |
+
toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
|
| 501 |
+
)
|
| 502 |
+
contents =[]
|
| 503 |
if video_part: contents =[video_part, prompt_text]
|
| 504 |
else: contents =[f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"]
|
| 505 |
prompt_used = prompt_text
|
| 506 |
+
save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
|
| 507 |
+
yield f"Generating Labels (Vertex {reasoning_method.upper()})..."
|
| 508 |
+
response = await loop.run_in_executor(None, lambda: client.models.generate_content(model=model_name, contents=contents, config=config))
|
|
|
|
|
|
|
|
|
|
| 509 |
raw_text = response.text
|
| 510 |
+
save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
|
|
|
|
|
|
|
|
|
|
|
|
|
| 511 |
|
| 512 |
if raw_text:
|
| 513 |
full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
|
|
|
|
| 524 |
|
| 525 |
missing_fields = validate_parsed_data(accumulated_data, is_text_only)
|
| 526 |
if not missing_fields:
|
| 527 |
+
yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
|
| 528 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 529 |
break
|
| 530 |
|
|
|
|
| 532 |
yield f"Max retries reached. Saving incomplete data.\n"
|
| 533 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 534 |
break
|
| 535 |
+
|
| 536 |
except Exception as e:
|
| 537 |
+
yield f"ERROR: {e}"
|
| 538 |
logger.error("Vertex Labeling Error", exc_info=True)
|
| 539 |
|
|
|
|
| 540 |
async def run_nrp_labeling_pipeline(video_path: str, caption: str, transcript: str, nrp_config: dict, include_comments: bool, reasoning_method: str = "cot", system_persona: str = "", request_id: str = None):
|
| 541 |
api_key = nrp_config.get("api_key")
|
| 542 |
model_name = nrp_config.get("model_name", "gpt-4")
|
|
|
|
| 554 |
|
| 555 |
is_text_only = True
|
| 556 |
system_persona += "\n" + TEXT_ONLY_INSTRUCTIONS
|
| 557 |
+
|
| 558 |
+
if nrp_config.get("use_search", False):
|
| 559 |
+
system_persona += "\n\n**CRITICAL: AGENTIC TOOLS ENABLED**\n- You MUST use the Web Search tool to fact-check the claims, look up current events, or verify entity backgrounds before concluding."
|
| 560 |
+
if nrp_config.get("use_code", False):
|
| 561 |
+
system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
|
| 562 |
|
| 563 |
toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
|
| 564 |
score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
|
|
|
|
| 576 |
"messages": messages,
|
| 577 |
"temperature": 0.1
|
| 578 |
}
|
| 579 |
+
|
| 580 |
+
logger.info(f"[{request_id}] NRP API Call ({attempt_label}) - URL: {base_url}/chat/completions")
|
| 581 |
+
logger.info(f"[{request_id}] NRP API Call - Model: {model_name}")
|
| 582 |
+
logger.info(f"[{request_id}] NRP API Call - Messages count: {len(messages)}")
|
| 583 |
+
|
| 584 |
def do_request():
|
| 585 |
+
start_time = datetime.datetime.now()
|
| 586 |
+
logger.info(f"[{request_id}] Dispatching requests.post (timeout=600s)...")
|
| 587 |
+
|
| 588 |
resp = requests.post(f"{base_url}/chat/completions", headers=headers, json=payload, timeout=600)
|
| 589 |
+
|
| 590 |
+
elapsed = (datetime.datetime.now() - start_time).total_seconds()
|
| 591 |
+
logger.info(f"[{request_id}] NRP API Response received in {elapsed:.2f}s. Status Code: {resp.status_code}")
|
| 592 |
+
|
| 593 |
if resp.status_code != 200:
|
| 594 |
+
logger.error(f"[{request_id}] API Error {resp.status_code}: {resp.text}")
|
| 595 |
raise Exception(f"API Error {resp.status_code}: {resp.text}")
|
| 596 |
+
|
| 597 |
+
resp_json = resp.json()
|
| 598 |
+
usage = resp_json.get("usage", {})
|
| 599 |
+
logger.info(f"[{request_id}] NRP API Usage: {usage}")
|
| 600 |
+
|
| 601 |
+
return resp_json["choices"][0]["message"]["content"]
|
| 602 |
+
|
| 603 |
return await loop.run_in_executor(None, do_request)
|
| 604 |
|
| 605 |
try:
|
|
|
|
| 607 |
raw_text = ""
|
| 608 |
if attempt > 0:
|
| 609 |
missing = validate_parsed_data(accumulated_data, is_text_only)
|
| 610 |
+
yield f"Validation failed. Missing fields: {missing}. Initiating Reprompt (Attempt {attempt}/{max_retries})...\n"
|
| 611 |
+
|
| 612 |
prompt_text = (
|
| 613 |
f"SYSTEM: Review the previous attempt which failed validation.\n"
|
| 614 |
f"CONTEXT: Caption: \"{caption}\"\nTranscript: \"{transcript}\"\n"
|
| 615 |
f"PREVIOUS (PARTIAL) DATA: {json.dumps(accumulated_data, indent=2)}\n"
|
| 616 |
f"MISSING FIELDS: {missing}\n"
|
| 617 |
+
f"INSTRUCTION: Generate the missing fields to complete the schema. You MUST provide the missing scores for {missing}.\n"
|
| 618 |
+
f"Output the FULL VALID TOON OBJECT containing all required fields.\n"
|
| 619 |
f"{toon_schema}"
|
| 620 |
)
|
| 621 |
+
|
| 622 |
save_debug_log(request_id, 'prompt', prompt_text, attempt, 'reprompt')
|
| 623 |
+
|
| 624 |
+
yield f" - Sending Reprompt request to NRP API (Model: {model_name}, Timeout: 600s)...\n"
|
| 625 |
raw_text = await _call_nrp([
|
| 626 |
{"role": "system", "content": system_persona},
|
| 627 |
{"role": "user", "content": prompt_text}
|
| 628 |
+
], attempt_label=f"reprompt_{attempt}")
|
| 629 |
+
yield f" - Received Reprompt response from NRP API.\n\n"
|
| 630 |
+
|
| 631 |
save_debug_log(request_id, 'response', raw_text, attempt, 'reprompt')
|
| 632 |
else:
|
| 633 |
if reasoning_method == "fcot":
|
| 634 |
yield "Starting Fractal Chain of Thought (NRP FCoT)...\n"
|
| 635 |
+
|
| 636 |
macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
|
| 637 |
macro_prompt = "NOTE: Text Only Analysis.\n" + macro_prompt
|
| 638 |
save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
|
| 639 |
|
| 640 |
macro_messages =[{"role": "system", "content": system_persona}, {"role": "user", "content": macro_prompt}]
|
| 641 |
+
yield f" - Stage 1: Sending Macro Hypothesis request to NRP API (Timeout: 600s)...\n"
|
| 642 |
+
macro_hypothesis = await _call_nrp(macro_messages, attempt_label="fcot_macro")
|
| 643 |
+
yield f" - Stage 1: Received Macro Hypothesis response.\n"
|
| 644 |
+
|
| 645 |
save_debug_log(request_id, 'response', macro_hypothesis, attempt, 'fcot_macro')
|
| 646 |
fcot_trace['macro'] = macro_hypothesis
|
| 647 |
|
| 648 |
meso_prompt = FCOT_MESO_PROMPT.format(macro_hypothesis=macro_hypothesis)
|
| 649 |
save_debug_log(request_id, 'prompt', meso_prompt, attempt, 'fcot_meso')
|
|
|
|
| 650 |
meso_messages = macro_messages +[{"role": "assistant", "content": macro_hypothesis}, {"role": "user", "content": meso_prompt}]
|
| 651 |
+
|
| 652 |
+
yield f" - Stage 2: Sending Meso Analysis request to NRP API (Timeout: 600s)...\n"
|
| 653 |
+
micro_observations = await _call_nrp(meso_messages, attempt_label="fcot_meso")
|
| 654 |
+
yield f" - Stage 2: Received Meso Analysis response.\n"
|
| 655 |
+
|
| 656 |
save_debug_log(request_id, 'response', micro_observations, attempt, 'fcot_meso')
|
| 657 |
fcot_trace['meso'] = micro_observations
|
| 658 |
|
| 659 |
synthesis_prompt = FCOT_SYNTHESIS_PROMPT.format(toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text)
|
| 660 |
save_debug_log(request_id, 'prompt', synthesis_prompt, attempt, 'fcot_synthesis')
|
|
|
|
| 661 |
synthesis_messages = meso_messages +[{"role": "assistant", "content": micro_observations}, {"role": "user", "content": synthesis_prompt}]
|
| 662 |
+
|
| 663 |
+
yield f" - Stage 3: Sending Synthesis/Formatting request to NRP API (Timeout: 600s)...\n"
|
| 664 |
+
raw_text = await _call_nrp(synthesis_messages, attempt_label="fcot_synthesis")
|
| 665 |
+
yield f" - Stage 3: Received Synthesis response.\n\n"
|
| 666 |
+
|
| 667 |
save_debug_log(request_id, 'response', raw_text, attempt, 'fcot_synthesis')
|
| 668 |
prompt_used = f"FCoT (NRP):\nMacro: {macro_hypothesis}\nMeso: {micro_observations}"
|
| 669 |
|
| 670 |
else:
|
| 671 |
+
template = LABELING_PROMPT_TEMPLATE_NO_COT if reasoning_method == "none" else LABELING_PROMPT_TEMPLATE
|
| 672 |
+
prompt_text = template.format(
|
| 673 |
system_persona=system_persona, caption=caption, transcript=transcript,
|
| 674 |
toon_schema=toon_schema, score_instructions=score_instructions, tag_list_text=tag_list_text
|
| 675 |
)
|
| 676 |
prompt_text = f"NOTE: Text Only Analysis (No Video).\n{prompt_text}"
|
| 677 |
prompt_used = prompt_text
|
| 678 |
+
save_debug_log(request_id, 'prompt', prompt_text, attempt, f'standard_{reasoning_method}')
|
| 679 |
+
yield f"Generating Labels (NRP {reasoning_method.upper()})...\n"
|
| 680 |
+
yield f" - Sending Standard request to NRP API (Model: {model_name}, Timeout: 600s)...\n"
|
| 681 |
+
|
| 682 |
raw_text = await _call_nrp([
|
| 683 |
{"role": "system", "content": system_persona},
|
| 684 |
{"role": "user", "content": prompt_text}
|
| 685 |
+
], attempt_label=f"standard_{reasoning_method}")
|
| 686 |
+
|
| 687 |
+
yield f" - Received response from NRP API.\n\n"
|
| 688 |
+
save_debug_log(request_id, 'response', raw_text, attempt, f'standard_{reasoning_method}')
|
| 689 |
|
| 690 |
if raw_text:
|
| 691 |
full_raw_text += f"\n--- Attempt {attempt} ---\n{raw_text}\n"
|
|
|
|
| 702 |
|
| 703 |
missing_fields = validate_parsed_data(accumulated_data, is_text_only)
|
| 704 |
if not missing_fields:
|
| 705 |
+
yield f"Validation Passed. All factuality components processed and confidence scores obtained. (Method: {reasoning_method})\n"
|
| 706 |
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_used": prompt_used, "fcot_trace": fcot_trace}
|
| 707 |
break
|
| 708 |
|
| 709 |
if attempt == max_retries:
|
| 710 |
+
yield f"Max retries reached. Saving incomplete data.\n"
|
| 711 |
+
yield {"raw_toon": full_raw_text, "parsed_data": accumulated_data, "prompt_
|
|
|
|
|
|
|
|
|
|
|
|