Spaces:
Sleeping
Sleeping
Commit ·
1c08c4a
1
Parent(s): ba8b51d
new v2
Browse files- frontend/src/App.tsx +415 -88
- main.go +12 -2
- src/app.py +879 -781
frontend/src/App.tsx
CHANGED
|
@@ -5,21 +5,21 @@ import {
|
|
| 5 |
StopCircle, RefreshCw, CheckCircle2, PenTool, ClipboardCheck, Info, Clock, FileText,
|
| 6 |
Tag, Home, Cpu, FlaskConical, Target, Trash2, ArrowUpRight, CheckSquare, Square,
|
| 7 |
Layers, Activity, Zap, BrainCircuit, Network, Archive, Plus, Edit3, RotateCcw,
|
| 8 |
-
Bot, Trophy, HelpCircle, Settings, Calculator
|
| 9 |
} from 'lucide-react';
|
| 10 |
|
| 11 |
function App() {
|
| 12 |
const[activeTab, setActiveTab] = useState('home');
|
| 13 |
const [logs, setLogs] = useState<string>('System Ready.\n');
|
| 14 |
-
const
|
| 15 |
const logContainerRef = useRef<HTMLDivElement>(null);
|
| 16 |
|
| 17 |
// Processing Config State
|
| 18 |
-
const[modelProvider, setModelProvider] = useState('nrp');
|
| 19 |
const [apiKey, setApiKey] = useState('');
|
| 20 |
-
const
|
| 21 |
-
const
|
| 22 |
-
const
|
| 23 |
const [location, setLocation] = useState('us-central1');
|
| 24 |
const [includeComments, setIncludeComments] = useState(false);
|
| 25 |
const[reasoningMethod, setReasoningMethod] = useState('cot');
|
|
@@ -35,6 +35,7 @@ function App() {
|
|
| 35 |
// Data States
|
| 36 |
const [queueList, setQueueList] = useState<any[]>([]);
|
| 37 |
const[selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
|
|
|
|
| 38 |
const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
|
| 39 |
|
| 40 |
const[singleLinkInput, setSingleLinkInput] = useState('');
|
|
@@ -43,10 +44,10 @@ function App() {
|
|
| 43 |
const[profilePosts, setProfilePosts] = useState<any[]>([]);
|
| 44 |
const [communityDatasets, setCommunityDatasets] = useState<any[]>([]);
|
| 45 |
const [communityAnalysis, setCommunityAnalysis] = useState<any>(null);
|
| 46 |
-
const
|
| 47 |
|
| 48 |
-
const
|
| 49 |
-
const[selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
|
| 50 |
const [lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
|
| 51 |
|
| 52 |
const [benchmarks, setBenchmarks] = useState<any>(null);
|
|
@@ -79,12 +80,20 @@ function App() {
|
|
| 79 |
const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
|
| 80 |
|
| 81 |
// Resampling configuration
|
| 82 |
-
const
|
| 83 |
|
| 84 |
// Drag Selection references
|
| 85 |
const isDraggingQueueRef = useRef(false);
|
| 86 |
const isDraggingDatasetRef = useRef(false);
|
| 87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
useEffect(() => {
|
| 89 |
const handleMouseUp = () => {
|
| 90 |
isDraggingQueueRef.current = false;
|
|
@@ -123,12 +132,16 @@ function App() {
|
|
| 123 |
|
| 124 |
setSelectedItems(new Set());
|
| 125 |
setLastDatasetIndex(null);
|
| 126 |
-
},
|
| 127 |
|
| 128 |
useEffect(() => {
|
| 129 |
if (logContainerRef.current) logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight;
|
| 130 |
}, [logs]);
|
| 131 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
useEffect(() => {
|
| 133 |
if (activeTab === 'agent' && agentMessages.length === 0) {
|
| 134 |
callAgent(agentMethod, {
|
|
@@ -274,6 +287,14 @@ function App() {
|
|
| 274 |
setLastQueueIndex(index);
|
| 275 |
};
|
| 276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
const promoteSelected = async () => {
|
| 278 |
if (selectedItems.size === 0) return alert("No items selected.");
|
| 279 |
if (!confirm(`Promote ${selectedItems.size} items to Ground Truth?`)) return;
|
|
@@ -437,6 +458,20 @@ function App() {
|
|
| 437 |
} catch(e) { alert("Error adding link"); }
|
| 438 |
};
|
| 439 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
const clearProcessed = async () => {
|
| 441 |
if(!confirm("Remove all 'Processed' items from the queue?")) return;
|
| 442 |
try {
|
|
@@ -527,6 +562,84 @@ function App() {
|
|
| 527 |
setRefreshTrigger(p => p+1);
|
| 528 |
};
|
| 529 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 530 |
const callAgent = async (method: string, payloadParams: any) => {
|
| 531 |
return fetch(agentEndpoint, {
|
| 532 |
method: 'POST',
|
|
@@ -602,7 +715,18 @@ function App() {
|
|
| 602 |
|
| 603 |
return (
|
| 604 |
<div className="flex h-screen w-full bg-[#09090b] text-slate-200 font-sans overflow-hidden">
|
| 605 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 606 |
{/* SIDEBAR */}
|
| 607 |
<div className="w-[280px] flex flex-col border-r border-slate-800/60 bg-[#0c0c0e]">
|
| 608 |
<div className="h-16 flex items-center px-6 border-b border-slate-800/60">
|
|
@@ -640,6 +764,196 @@ function App() {
|
|
| 640 |
{/* HOME TAB */}
|
| 641 |
{activeTab === 'home' && (
|
| 642 |
<div className="h-full overflow-y-auto space-y-8 max-w-5xl pr-2">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
<div className="grid grid-cols-3 gap-6">
|
| 644 |
<div className="col-span-2 bg-slate-900/50 border border-slate-800 rounded-xl p-6">
|
| 645 |
<h2 className="text-xl font-bold text-white mb-4">Philosophy & Methodology</h2>
|
|
@@ -799,35 +1113,29 @@ function App() {
|
|
| 799 |
<div className="space-y-1">
|
| 800 |
<input type="password" value={apiKey} onChange={e => setApiKey(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white placeholder-slate-600" placeholder="NRP API Token"/>
|
| 801 |
</div>
|
| 802 |
-
<div className="space-y-1">
|
| 803 |
-
<select value={modelName} onChange={e => setModelName(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
| 804 |
-
<option value="qwen3">qwen3 (Multimodal)</option>
|
| 805 |
-
<option value="gpt-oss">gpt-oss</option>
|
| 806 |
-
<option value="kimi">kimi</option>
|
| 807 |
-
<option value="glm-4.7">glm-4.7</option>
|
| 808 |
-
<option value="minimax-m2">minimax-m2</option>
|
| 809 |
-
<option value="glm-v">glm-v (Multimodal)</option>
|
| 810 |
-
<option value="gemma3">gemma3 (Multimodal)</option>
|
| 811 |
-
</select>
|
| 812 |
-
</div>
|
| 813 |
</>
|
| 814 |
)}
|
| 815 |
-
|
| 816 |
-
{modelProvider !== 'nrp' && (
|
| 817 |
-
<div className="space-y-1">
|
| 818 |
-
<input value={modelName} onChange={e => setModelName(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white placeholder-slate-600" placeholder="Model Name"/>
|
| 819 |
-
</div>
|
| 820 |
-
)}
|
| 821 |
|
| 822 |
-
<
|
| 823 |
-
<
|
| 824 |
-
<
|
| 825 |
-
</
|
| 826 |
-
|
| 827 |
-
|
| 828 |
-
|
| 829 |
-
|
| 830 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 831 |
</div>
|
| 832 |
</div>
|
| 833 |
|
|
@@ -1004,27 +1312,12 @@ function App() {
|
|
| 1004 |
</>
|
| 1005 |
)}
|
| 1006 |
|
| 1007 |
-
{modelProvider === 'nrp' ? (
|
| 1008 |
-
<div className="space-y-1">
|
| 1009 |
-
<label className="text-[10px] text-slate-500">Model Name</label>
|
| 1010 |
-
<select value={modelName} onChange={e => setModelName(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
| 1011 |
-
<option value="qwen3">qwen3 (Multimodal)</option>
|
| 1012 |
-
<option value="gpt-oss">gpt-oss</option>
|
| 1013 |
-
<option value="kimi">kimi</option>
|
| 1014 |
-
<option value="glm-4.7">glm-4.7</option>
|
| 1015 |
-
<option value="minimax-m2">minimax-m2</option>
|
| 1016 |
-
<option value="glm-v">glm-v (Multimodal)</option>
|
| 1017 |
-
<option value="gemma3">gemma3 (Multimodal)</option>
|
| 1018 |
-
</select>
|
| 1019 |
-
</div>
|
| 1020 |
-
) : (
|
| 1021 |
-
<div className="space-y-1">
|
| 1022 |
-
<label className="text-[10px] text-slate-500">Model Name</label>
|
| 1023 |
-
<input value={modelName} onChange={e => setModelName(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white"/>
|
| 1024 |
-
</div>
|
| 1025 |
-
)}
|
| 1026 |
-
|
| 1027 |
<div className="space-y-1">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1028 |
<label className="text-[10px] text-slate-500">Reasoning Method</label>
|
| 1029 |
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
| 1030 |
<option value="cot">Standard Chain of Thought</option>
|
|
@@ -1076,34 +1369,68 @@ function App() {
|
|
| 1076 |
</thead>
|
| 1077 |
<tbody>
|
| 1078 |
{queueList.map((q, i, arr) => (
|
| 1079 |
-
<
|
| 1080 |
-
|
| 1081 |
-
|
| 1082 |
-
|
| 1083 |
-
|
| 1084 |
-
|
| 1085 |
-
|
| 1086 |
-
|
| 1087 |
-
|
| 1088 |
-
|
| 1089 |
-
|
| 1090 |
-
|
| 1091 |
-
|
| 1092 |
-
|
| 1093 |
-
|
| 1094 |
-
|
| 1095 |
-
|
| 1096 |
-
|
| 1097 |
-
|
| 1098 |
-
|
| 1099 |
-
|
| 1100 |
-
|
| 1101 |
-
|
| 1102 |
-
|
| 1103 |
-
|
| 1104 |
-
|
| 1105 |
-
|
| 1106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1107 |
))}
|
| 1108 |
</tbody>
|
| 1109 |
</table>
|
|
@@ -1406,7 +1733,7 @@ function App() {
|
|
| 1406 |
<span className="capitalize text-slate-300 font-bold">{k.replace('v', 'Video-').replace('a', 'Audio-').replace('c', 'Caption')}</span>
|
| 1407 |
<span className="text-emerald-400 font-mono font-bold">{(manualScores as any)[k]}/10</span>
|
| 1408 |
</div>
|
| 1409 |
-
<input type="range" min="1" max="10" value={(manualScores as any)[k]} onChange={e => setManualScores({...manualScores,
|
| 1410 |
</div>
|
| 1411 |
))}
|
| 1412 |
</div>
|
|
@@ -1486,7 +1813,7 @@ function App() {
|
|
| 1486 |
</div>
|
| 1487 |
)}
|
| 1488 |
|
| 1489 |
-
{/* COMMUNITY AND ANALYTICS TABS */}
|
| 1490 |
{activeTab === 'community' && (
|
| 1491 |
<div className="flex h-full gap-6">
|
| 1492 |
<div className="w-1/3 bg-slate-900/50 border border-slate-800 rounded-xl overflow-auto">
|
|
|
|
| 5 |
StopCircle, RefreshCw, CheckCircle2, PenTool, ClipboardCheck, Info, Clock, FileText,
|
| 6 |
Tag, Home, Cpu, FlaskConical, Target, Trash2, ArrowUpRight, CheckSquare, Square,
|
| 7 |
Layers, Activity, Zap, BrainCircuit, Network, Archive, Plus, Edit3, RotateCcw,
|
| 8 |
+
Bot, Trophy, HelpCircle, Settings, Calculator, ChevronDown, ChevronUp
|
| 9 |
} from 'lucide-react';
|
| 10 |
|
| 11 |
function App() {
|
| 12 |
const[activeTab, setActiveTab] = useState('home');
|
| 13 |
const [logs, setLogs] = useState<string>('System Ready.\n');
|
| 14 |
+
const[isProcessing, setIsProcessing] = useState(false);
|
| 15 |
const logContainerRef = useRef<HTMLDivElement>(null);
|
| 16 |
|
| 17 |
// Processing Config State
|
| 18 |
+
const [modelProvider, setModelProvider] = useState('nrp');
|
| 19 |
const [apiKey, setApiKey] = useState('');
|
| 20 |
+
const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
|
| 21 |
+
const[modelName, setModelName] = useState('qwen3'); // Default
|
| 22 |
+
const[projectId, setProjectId] = useState('');
|
| 23 |
const [location, setLocation] = useState('us-central1');
|
| 24 |
const [includeComments, setIncludeComments] = useState(false);
|
| 25 |
const[reasoningMethod, setReasoningMethod] = useState('cot');
|
|
|
|
| 35 |
// Data States
|
| 36 |
const [queueList, setQueueList] = useState<any[]>([]);
|
| 37 |
const[selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
|
| 38 |
+
const[expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
|
| 39 |
const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
|
| 40 |
|
| 41 |
const[singleLinkInput, setSingleLinkInput] = useState('');
|
|
|
|
| 44 |
const[profilePosts, setProfilePosts] = useState<any[]>([]);
|
| 45 |
const [communityDatasets, setCommunityDatasets] = useState<any[]>([]);
|
| 46 |
const [communityAnalysis, setCommunityAnalysis] = useState<any>(null);
|
| 47 |
+
const[integrityBoard, setIntegrityBoard] = useState<any[]>([]);
|
| 48 |
|
| 49 |
+
const[datasetList, setDatasetList] = useState<any[]>([]);
|
| 50 |
+
const [selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
|
| 51 |
const [lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
|
| 52 |
|
| 53 |
const [benchmarks, setBenchmarks] = useState<any>(null);
|
|
|
|
| 80 |
const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
|
| 81 |
|
| 82 |
// Resampling configuration
|
| 83 |
+
const[resampleCount, setResampleCount] = useState<number>(1);
|
| 84 |
|
| 85 |
// Drag Selection references
|
| 86 |
const isDraggingQueueRef = useRef(false);
|
| 87 |
const isDraggingDatasetRef = useRef(false);
|
| 88 |
|
| 89 |
+
// Quick Demo State
|
| 90 |
+
const[demoLink, setDemoLink] = useState('');
|
| 91 |
+
const [demoLogs, setDemoLogs] = useState('');
|
| 92 |
+
const [demoIsProcessing, setDemoIsProcessing] = useState(false);
|
| 93 |
+
const[demoResult, setDemoResult] = useState<any>(null);
|
| 94 |
+
const[showDemoConfig, setShowDemoConfig] = useState(false);
|
| 95 |
+
const demoLogContainerRef = useRef<HTMLDivElement>(null);
|
| 96 |
+
|
| 97 |
useEffect(() => {
|
| 98 |
const handleMouseUp = () => {
|
| 99 |
isDraggingQueueRef.current = false;
|
|
|
|
| 132 |
|
| 133 |
setSelectedItems(new Set());
|
| 134 |
setLastDatasetIndex(null);
|
| 135 |
+
},[activeTab, refreshTrigger]);
|
| 136 |
|
| 137 |
useEffect(() => {
|
| 138 |
if (logContainerRef.current) logContainerRef.current.scrollTop = logContainerRef.current.scrollHeight;
|
| 139 |
}, [logs]);
|
| 140 |
|
| 141 |
+
useEffect(() => {
|
| 142 |
+
if (demoLogContainerRef.current) demoLogContainerRef.current.scrollTop = demoLogContainerRef.current.scrollHeight;
|
| 143 |
+
},[demoLogs]);
|
| 144 |
+
|
| 145 |
useEffect(() => {
|
| 146 |
if (activeTab === 'agent' && agentMessages.length === 0) {
|
| 147 |
callAgent(agentMethod, {
|
|
|
|
| 287 |
setLastQueueIndex(index);
|
| 288 |
};
|
| 289 |
|
| 290 |
+
const toggleQueueExpand = (e: React.MouseEvent, link: string) => {
|
| 291 |
+
e.stopPropagation();
|
| 292 |
+
const newSet = new Set(expandedQueueItems);
|
| 293 |
+
if (newSet.has(link)) newSet.delete(link);
|
| 294 |
+
else newSet.add(link);
|
| 295 |
+
setExpandedQueueItems(newSet);
|
| 296 |
+
};
|
| 297 |
+
|
| 298 |
const promoteSelected = async () => {
|
| 299 |
if (selectedItems.size === 0) return alert("No items selected.");
|
| 300 |
if (!confirm(`Promote ${selectedItems.size} items to Ground Truth?`)) return;
|
|
|
|
| 458 |
} catch(e) { alert("Error adding link"); }
|
| 459 |
};
|
| 460 |
|
| 461 |
+
const addSingleLinkDirect = async (link: string) => {
|
| 462 |
+
if(!link) return;
|
| 463 |
+
try {
|
| 464 |
+
const res = await fetch('/queue/add', {
|
| 465 |
+
method: 'POST', headers: {'Content-Type': 'application/json'},
|
| 466 |
+
body: JSON.stringify({ link: link })
|
| 467 |
+
});
|
| 468 |
+
const d = await res.json();
|
| 469 |
+
if(d.status === 'success') {
|
| 470 |
+
setRefreshTrigger(p => p+1);
|
| 471 |
+
} else { alert(d.message); }
|
| 472 |
+
} catch(e) { alert("Error adding link"); }
|
| 473 |
+
};
|
| 474 |
+
|
| 475 |
const clearProcessed = async () => {
|
| 476 |
if(!confirm("Remove all 'Processed' items from the queue?")) return;
|
| 477 |
try {
|
|
|
|
| 562 |
setRefreshTrigger(p => p+1);
|
| 563 |
};
|
| 564 |
|
| 565 |
+
const runDemo = async () => {
|
| 566 |
+
if (!demoLink) return;
|
| 567 |
+
setDemoIsProcessing(true);
|
| 568 |
+
setDemoLogs('[SYSTEM] Preparing pipeline for single execution...\n');
|
| 569 |
+
setDemoResult(null);
|
| 570 |
+
|
| 571 |
+
try {
|
| 572 |
+
// 1. Ensure the link is added and forcefully requeued so it is 'Pending'
|
| 573 |
+
await fetch('/queue/add', {
|
| 574 |
+
method: 'POST', headers: {'Content-Type': 'application/json'},
|
| 575 |
+
body: JSON.stringify({ link: demoLink })
|
| 576 |
+
});
|
| 577 |
+
await fetch('/queue/requeue', {
|
| 578 |
+
method: 'POST', headers: {'Content-Type': 'application/json'},
|
| 579 |
+
body: JSON.stringify({ links: [demoLink] })
|
| 580 |
+
});
|
| 581 |
+
|
| 582 |
+
// 2. Setup FormData exactly like normal queue
|
| 583 |
+
const fd = new FormData();
|
| 584 |
+
const activeProvider = modelProvider === 'gcloud' ? 'vertex' : modelProvider;
|
| 585 |
+
fd.append('model_selection', activeProvider);
|
| 586 |
+
fd.append('gemini_api_key', apiKey);
|
| 587 |
+
fd.append('gemini_model_name', modelName);
|
| 588 |
+
fd.append('vertex_project_id', projectId);
|
| 589 |
+
fd.append('vertex_location', location);
|
| 590 |
+
fd.append('vertex_model_name', modelName);
|
| 591 |
+
fd.append('vertex_api_key', apiKey);
|
| 592 |
+
fd.append('nrp_api_key', apiKey);
|
| 593 |
+
fd.append('nrp_model_name', modelName);
|
| 594 |
+
fd.append('nrp_base_url', baseUrl);
|
| 595 |
+
fd.append('include_comments', includeComments.toString());
|
| 596 |
+
fd.append('reasoning_method', reasoningMethod);
|
| 597 |
+
fd.append('prompt_template', promptTemplate);
|
| 598 |
+
fd.append('custom_query', customQuery);
|
| 599 |
+
fd.append('max_reprompts', maxRetries.toString());
|
| 600 |
+
|
| 601 |
+
setDemoLogs(prev => prev + '[SYSTEM] Sending analysis payload to model server...\n');
|
| 602 |
+
|
| 603 |
+
// 3. Read Stream
|
| 604 |
+
const runRes = await fetch('/queue/run', { method: 'POST', body: fd });
|
| 605 |
+
const reader = runRes.body!.pipeThrough(new TextDecoderStream()).getReader();
|
| 606 |
+
|
| 607 |
+
while (true) {
|
| 608 |
+
const { value, done } = await reader.read();
|
| 609 |
+
if (done) break;
|
| 610 |
+
if (value.includes('event: close')) break;
|
| 611 |
+
const clean = value.replace(/data: /g, '').trim();
|
| 612 |
+
if (clean) setDemoLogs(prev => prev + clean + '\n');
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
// 4. Look up result from dataset
|
| 616 |
+
setDemoLogs(prev => prev + '\n[SYSTEM] Fetching structured object result...\n');
|
| 617 |
+
const dsRes = await fetch('/dataset/list');
|
| 618 |
+
const dsList = await dsRes.json();
|
| 619 |
+
|
| 620 |
+
const normalize = (l: string) => l.split('?')[0].replace('https://', '').replace('http://', '').replace('www.', '').replace(/\/$/, '');
|
| 621 |
+
const targetLink = normalize(demoLink);
|
| 622 |
+
|
| 623 |
+
// Sort descending to ensure we get the latest processed run
|
| 624 |
+
dsList.sort((a: any, b: any) => new Date(b.timestamp).getTime() - new Date(a.timestamp).getTime());
|
| 625 |
+
|
| 626 |
+
const match = dsList.find((d: any) => normalize(d.link || '') === targetLink && d.source !== 'Manual');
|
| 627 |
+
|
| 628 |
+
if (match) {
|
| 629 |
+
setDemoResult(match);
|
| 630 |
+
setDemoLogs(prev => prev + '[SYSTEM] Result rendered successfully.\n');
|
| 631 |
+
} else {
|
| 632 |
+
setDemoLogs(prev => prev + '[SYSTEM] Error: Could not find parsed result. Processing might have failed.\n');
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
} catch (err: any) {
|
| 636 |
+
setDemoLogs(prev => prev + `\n[ERROR] ${err.message}\n`);
|
| 637 |
+
} finally {
|
| 638 |
+
setDemoIsProcessing(false);
|
| 639 |
+
setRefreshTrigger(p => p+1);
|
| 640 |
+
}
|
| 641 |
+
}
|
| 642 |
+
|
| 643 |
const callAgent = async (method: string, payloadParams: any) => {
|
| 644 |
return fetch(agentEndpoint, {
|
| 645 |
method: 'POST',
|
|
|
|
| 715 |
|
| 716 |
return (
|
| 717 |
<div className="flex h-screen w-full bg-[#09090b] text-slate-200 font-sans overflow-hidden">
|
| 718 |
+
<datalist id="modelSuggestions">
|
| 719 |
+
<option value="gemini-1.5-pro-preview-0409" />
|
| 720 |
+
<option value="gemini-2.0-flash-exp" />
|
| 721 |
+
<option value="qwen3" />
|
| 722 |
+
<option value="gpt-oss" />
|
| 723 |
+
<option value="kimi" />
|
| 724 |
+
<option value="glm-4.7" />
|
| 725 |
+
<option value="minimax-m2" />
|
| 726 |
+
<option value="glm-v" />
|
| 727 |
+
<option value="gemma3" />
|
| 728 |
+
</datalist>
|
| 729 |
+
|
| 730 |
{/* SIDEBAR */}
|
| 731 |
<div className="w-[280px] flex flex-col border-r border-slate-800/60 bg-[#0c0c0e]">
|
| 732 |
<div className="h-16 flex items-center px-6 border-b border-slate-800/60">
|
|
|
|
| 764 |
{/* HOME TAB */}
|
| 765 |
{activeTab === 'home' && (
|
| 766 |
<div className="h-full overflow-y-auto space-y-8 max-w-5xl pr-2">
|
| 767 |
+
|
| 768 |
+
{/* QUICK DEMO SECTION */}
|
| 769 |
+
<div className="bg-slate-900/50 border border-slate-800 rounded-xl p-6 shadow-sm">
|
| 770 |
+
<div className="flex justify-between items-start mb-4">
|
| 771 |
+
<div>
|
| 772 |
+
<h2 className="text-xl font-bold text-white flex items-center gap-2">
|
| 773 |
+
<Zap className="w-5 h-5 text-amber-400" /> Try LiarMP4 - Quick Demo
|
| 774 |
+
</h2>
|
| 775 |
+
<p className="text-sm text-slate-400 mt-1">
|
| 776 |
+
Test the Multimodal Factuality Pipeline on a single video URL.
|
| 777 |
+
</p>
|
| 778 |
+
</div>
|
| 779 |
+
<button
|
| 780 |
+
onClick={() => setShowDemoConfig(!showDemoConfig)}
|
| 781 |
+
className="text-xs font-bold text-slate-500 hover:text-slate-300 flex items-center gap-1 bg-slate-950 px-3 py-1.5 rounded border border-slate-800"
|
| 782 |
+
>
|
| 783 |
+
<Settings className="w-4 h-4" />
|
| 784 |
+
{showDemoConfig ? "Hide Config" : "Show Config"}
|
| 785 |
+
</button>
|
| 786 |
+
</div>
|
| 787 |
+
|
| 788 |
+
{showDemoConfig && (
|
| 789 |
+
<div className="bg-slate-950 p-4 rounded-lg border border-slate-800 mb-4 grid grid-cols-2 gap-6 animate-in fade-in zoom-in-95 duration-200">
|
| 790 |
+
<div className="space-y-3">
|
| 791 |
+
<label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">LLM Provider Config</label>
|
| 792 |
+
<select value={modelProvider} onChange={e => setModelProvider(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
|
| 793 |
+
<option value="vertex">Vertex AI (Enterprise)</option>
|
| 794 |
+
<option value="gemini">Gemini API (Public)</option>
|
| 795 |
+
<option value="gcloud">Google Cloud (Project + API Key)</option>
|
| 796 |
+
<option value="nrp">NRP (Nautilus Envoy Gateway)</option>
|
| 797 |
+
</select>
|
| 798 |
+
|
| 799 |
+
{modelProvider === 'nrp' && (
|
| 800 |
+
<>
|
| 801 |
+
<input value={baseUrl} onChange={e => setBaseUrl(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white" placeholder="Base URL"/>
|
| 802 |
+
<input type="password" value={apiKey} onChange={e => setApiKey(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white" placeholder="API Token"/>
|
| 803 |
+
</>
|
| 804 |
+
)}
|
| 805 |
+
{modelProvider === 'gemini' && (
|
| 806 |
+
<input type="password" value={apiKey} onChange={e => setApiKey(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white" placeholder="API Key"/>
|
| 807 |
+
)}
|
| 808 |
+
{(modelProvider === 'vertex' || modelProvider === 'gcloud') && (
|
| 809 |
+
<>
|
| 810 |
+
<input value={projectId} onChange={e => setProjectId(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white" placeholder="Project ID"/>
|
| 811 |
+
<input value={location} onChange={e => setLocation(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white" placeholder="Location"/>
|
| 812 |
+
{modelProvider === 'gcloud' && <input type="password" value={apiKey} onChange={e => setApiKey(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white" placeholder="API Key"/>}
|
| 813 |
+
</>
|
| 814 |
+
)}
|
| 815 |
+
<input list="modelSuggestions" value={modelName} onChange={e => setModelName(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white" placeholder="Model Name (e.g. gemini-1.5-pro)"/>
|
| 816 |
+
</div>
|
| 817 |
+
<div className="space-y-3">
|
| 818 |
+
<label className="text-[10px] text-slate-500 uppercase font-bold block border-b border-slate-800 pb-1">Inference Strategy</label>
|
| 819 |
+
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
|
| 820 |
+
<option value="cot">Standard Chain of Thought</option>
|
| 821 |
+
<option value="fcot">Fractal Chain of Thought</option>
|
| 822 |
+
</select>
|
| 823 |
+
<select value={promptTemplate} onChange={e => setPromptTemplate(e.target.value)} className="w-full bg-slate-900 border border-slate-700 rounded p-2 text-xs text-white">
|
| 824 |
+
{availablePrompts.length > 0 ? availablePrompts.map(p => (
|
| 825 |
+
<option key={p.id} value={p.id}>{p.name}</option>
|
| 826 |
+
)) : <option value="standard">Standard</option>}
|
| 827 |
+
</select>
|
| 828 |
+
</div>
|
| 829 |
+
</div>
|
| 830 |
+
)}
|
| 831 |
+
|
| 832 |
+
<div className="flex gap-2 mb-4">
|
| 833 |
+
<input
|
| 834 |
+
type="text"
|
| 835 |
+
value={demoLink}
|
| 836 |
+
onChange={(e) => setDemoLink(e.target.value)}
|
| 837 |
+
placeholder="Enter X/Twitter Video URL here..."
|
| 838 |
+
className="flex-1 bg-slate-950 border border-slate-700 rounded-lg p-3 text-sm text-white focus:outline-none focus:border-indigo-500"
|
| 839 |
+
disabled={demoIsProcessing}
|
| 840 |
+
/>
|
| 841 |
+
<button
|
| 842 |
+
onClick={runDemo}
|
| 843 |
+
disabled={demoIsProcessing || !demoLink}
|
| 844 |
+
className="bg-indigo-600 hover:bg-indigo-500 disabled:bg-slate-700 text-white px-6 py-3 rounded-lg font-bold flex items-center gap-2 transition"
|
| 845 |
+
>
|
| 846 |
+
{demoIsProcessing ? <RefreshCw className="w-4 h-4 animate-spin" /> : <Play className="w-4 h-4 fill-white" />}
|
| 847 |
+
{demoIsProcessing ? "Processing..." : "Analyze"}
|
| 848 |
+
</button>
|
| 849 |
+
</div>
|
| 850 |
+
|
| 851 |
+
{(demoIsProcessing || demoLogs) && !demoResult && (
|
| 852 |
+
<div className="bg-black border border-slate-800 rounded-lg p-4 font-mono text-[10px] text-emerald-500 h-64 overflow-y-auto whitespace-pre-wrap animate-in fade-in duration-300" ref={demoLogContainerRef}>
|
| 853 |
+
{demoLogs || "Initializing pipeline..."}
|
| 854 |
+
</div>
|
| 855 |
+
)}
|
| 856 |
+
|
| 857 |
+
{demoResult && (
|
| 858 |
+
<div className="mt-6 border-t border-slate-800 pt-6 animate-in fade-in slide-in-from-bottom-4 duration-500">
|
| 859 |
+
<div className="flex justify-between items-start mb-6">
|
| 860 |
+
<div>
|
| 861 |
+
<h3 className="text-xl font-bold text-white flex items-center gap-2">
|
| 862 |
+
<ShieldCheck className="w-6 h-6 text-emerald-400" />
|
| 863 |
+
<span className="text-indigo-400 uppercase tracking-wider text-[16px] bg-indigo-500/10 px-2 py-0.5 rounded border border-indigo-500/20">[{demoResult.config_model || 'AI'}]
|
| 864 |
+
</span>
|
| 865 |
+
Analysis Complete
|
| 866 |
+
</h3>
|
| 867 |
+
<div className="text-xs text-slate-400 mt-2 font-mono">
|
| 868 |
+
ID: {demoResult.id} | Prompt: {demoResult.config_prompt} | Reasoning: {demoResult.config_reasoning}
|
| 869 |
+
</div>
|
| 870 |
+
</div>
|
| 871 |
+
<div className="bg-slate-950 border border-slate-800 px-6 py-2 rounded-lg text-center">
|
| 872 |
+
<div className="text-[10px] uppercase text-slate-500 font-bold mb-1">Final Score</div>
|
| 873 |
+
<div className={`text-4xl font-bold font-mono ${demoResult.final_veracity_score < 50 ? 'text-red-400' : 'text-emerald-400'}`}>
|
| 874 |
+
{demoResult.final_veracity_score}
|
| 875 |
+
</div>
|
| 876 |
+
</div>
|
| 877 |
+
</div>
|
| 878 |
+
|
| 879 |
+
<div className="grid grid-cols-3 gap-6 mb-6">
|
| 880 |
+
<div className="col-span-2 space-y-4">
|
| 881 |
+
<div className="bg-slate-950 p-4 rounded-lg border border-slate-800">
|
| 882 |
+
<div className="text-xs font-bold text-indigo-400 uppercase mb-2">Video Context</div>
|
| 883 |
+
<div className="text-sm text-slate-300 italic leading-relaxed">"{demoResult.caption || 'No specific context found'}"</div>
|
| 884 |
+
</div>
|
| 885 |
+
|
| 886 |
+
<div className="bg-slate-950 p-4 rounded-lg border border-slate-800">
|
| 887 |
+
<div className="text-xs font-bold text-amber-400 uppercase mb-2">AI Reasoning</div>
|
| 888 |
+
<div className="text-sm text-slate-300 whitespace-pre-wrap leading-relaxed">{demoResult.reasoning || 'No reasoning provided'}</div>
|
| 889 |
+
</div>
|
| 890 |
+
|
| 891 |
+
{demoResult.tags && (
|
| 892 |
+
<div className="flex flex-wrap gap-2">
|
| 893 |
+
{demoResult.tags.split(',').map((t: string) => (
|
| 894 |
+
<span key={t} className="px-2 py-1 bg-slate-800 border border-slate-700 rounded text-xs text-slate-400 font-mono">{t.trim()}</span>
|
| 895 |
+
))}
|
| 896 |
+
</div>
|
| 897 |
+
)}
|
| 898 |
+
</div>
|
| 899 |
+
|
| 900 |
+
<div className="space-y-4">
|
| 901 |
+
<div className="bg-slate-950 p-4 rounded-lg border border-slate-800">
|
| 902 |
+
<div className="text-xs font-bold text-sky-400 uppercase mb-3">Veracity Vectors</div>
|
| 903 |
+
<div className="space-y-2">
|
| 904 |
+
{[
|
| 905 |
+
{ label: 'Visual Integrity', score: parseFloat(demoResult.visual_score) || 0 },
|
| 906 |
+
{ label: 'Audio Integrity', score: parseFloat(demoResult.audio_score) || 0 },
|
| 907 |
+
{ label: 'Source Credibility', score: parseFloat(demoResult.source_score) || 0 },
|
| 908 |
+
{ label: 'Logical Consistency', score: parseFloat(demoResult.logic_score) || 0 },
|
| 909 |
+
{ label: 'Emotional Manip.', score: parseFloat(demoResult.emotion_score) || 0 },
|
| 910 |
+
].map((v) => (
|
| 911 |
+
<div key={v.label} className="flex justify-between items-center text-xs">
|
| 912 |
+
<span className="text-slate-400 w-32 truncate">{v.label}</span>
|
| 913 |
+
<div className="flex items-center gap-2 flex-1 ml-2">
|
| 914 |
+
<div className="flex-1 h-1.5 bg-slate-800 rounded-full overflow-hidden">
|
| 915 |
+
<div className={`h-full ${v.score < 5 ? 'bg-red-400' : v.score < 8 ? 'bg-amber-400' : 'bg-emerald-400'}`} style={{ width: `${(v.score / 10) * 100}%` }} />
|
| 916 |
+
</div>
|
| 917 |
+
<span className="font-mono text-slate-300 w-6 text-right">{v.score}</span>
|
| 918 |
+
</div>
|
| 919 |
+
</div>
|
| 920 |
+
))}
|
| 921 |
+
</div>
|
| 922 |
+
</div>
|
| 923 |
+
|
| 924 |
+
<div className="bg-slate-950 p-4 rounded-lg border border-slate-800">
|
| 925 |
+
<div className="text-xs font-bold text-pink-400 uppercase mb-3">Modality Alignment</div>
|
| 926 |
+
<div className="space-y-2">
|
| 927 |
+
{[
|
| 928 |
+
{ label: 'Video ↔ Audio', score: parseFloat(demoResult.align_video_audio) || 0 },
|
| 929 |
+
{ label: 'Video ↔ Caption', score: parseFloat(demoResult.align_video_caption) || 0 },
|
| 930 |
+
{ label: 'Audio ↔ Caption', score: parseFloat(demoResult.align_audio_caption) || 0 },
|
| 931 |
+
].map((v) => (
|
| 932 |
+
<div key={v.label} className="flex justify-between items-center text-xs">
|
| 933 |
+
<span className="text-slate-400 w-32 truncate">{v.label}</span>
|
| 934 |
+
<div className="flex items-center gap-2 flex-1 ml-2">
|
| 935 |
+
<div className="flex-1 h-1.5 bg-slate-800 rounded-full overflow-hidden">
|
| 936 |
+
<div className={`h-full ${v.score < 5 ? 'bg-red-400' : v.score < 8 ? 'bg-amber-400' : 'bg-emerald-400'}`} style={{ width: `${(v.score / 10) * 100}%` }} />
|
| 937 |
+
</div>
|
| 938 |
+
<span className="font-mono text-slate-300 w-6 text-right">{v.score}</span>
|
| 939 |
+
</div>
|
| 940 |
+
</div>
|
| 941 |
+
))}
|
| 942 |
+
</div>
|
| 943 |
+
</div>
|
| 944 |
+
</div>
|
| 945 |
+
</div>
|
| 946 |
+
|
| 947 |
+
<div className="flex justify-end">
|
| 948 |
+
<button onClick={() => {setDemoResult(null); setDemoLogs(''); setDemoLink('');}} className="text-xs text-slate-500 hover:text-white flex items-center gap-1 border border-slate-800 px-3 py-1.5 rounded bg-slate-950">
|
| 949 |
+
<RotateCcw className="w-3 h-3"/> Reset Demo
|
| 950 |
+
</button>
|
| 951 |
+
</div>
|
| 952 |
+
</div>
|
| 953 |
+
)}
|
| 954 |
+
</div>
|
| 955 |
+
|
| 956 |
+
{/* EXISTING HOME TAB CONTENT */}
|
| 957 |
<div className="grid grid-cols-3 gap-6">
|
| 958 |
<div className="col-span-2 bg-slate-900/50 border border-slate-800 rounded-xl p-6">
|
| 959 |
<h2 className="text-xl font-bold text-white mb-4">Philosophy & Methodology</h2>
|
|
|
|
| 1113 |
<div className="space-y-1">
|
| 1114 |
<input type="password" value={apiKey} onChange={e => setApiKey(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white placeholder-slate-600" placeholder="NRP API Token"/>
|
| 1115 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1116 |
</>
|
| 1117 |
)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1118 |
|
| 1119 |
+
<div className="space-y-1">
|
| 1120 |
+
<label className="text-[10px] text-slate-500">Model Name</label>
|
| 1121 |
+
<input list="modelSuggestions" value={modelName} onChange={e => setModelName(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white placeholder-slate-600" placeholder="Model Name"/>
|
| 1122 |
+
</div>
|
| 1123 |
+
|
| 1124 |
+
<div className="space-y-1 mt-2">
|
| 1125 |
+
<label className="text-[10px] text-slate-500">Reasoning Method</label>
|
| 1126 |
+
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
| 1127 |
+
<option value="cot">Standard Chain of Thought</option>
|
| 1128 |
+
<option value="fcot">Fractal Chain of Thought</option>
|
| 1129 |
+
</select>
|
| 1130 |
+
</div>
|
| 1131 |
+
<div className="space-y-1">
|
| 1132 |
+
<label className="text-[10px] text-slate-500">Prompt Persona</label>
|
| 1133 |
+
<select value={promptTemplate} onChange={e => setPromptTemplate(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
| 1134 |
+
{availablePrompts.length > 0 ? availablePrompts.map(p => (
|
| 1135 |
+
<option key={p.id} value={p.id}>{p.name}</option>
|
| 1136 |
+
)) : <option value="standard">Standard</option>}
|
| 1137 |
+
</select>
|
| 1138 |
+
</div>
|
| 1139 |
</div>
|
| 1140 |
</div>
|
| 1141 |
|
|
|
|
| 1312 |
</>
|
| 1313 |
)}
|
| 1314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1315 |
<div className="space-y-1">
|
| 1316 |
+
<label className="text-[10px] text-slate-500">Model Name</label>
|
| 1317 |
+
<input list="modelSuggestions" value={modelName} onChange={e => setModelName(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white" placeholder="Model Name"/>
|
| 1318 |
+
</div>
|
| 1319 |
+
|
| 1320 |
+
<div className="space-y-1 mt-2">
|
| 1321 |
<label className="text-[10px] text-slate-500">Reasoning Method</label>
|
| 1322 |
<select value={reasoningMethod} onChange={e => setReasoningMethod(e.target.value)} className="w-full bg-slate-950 border border-slate-700 rounded p-2 text-xs text-white">
|
| 1323 |
<option value="cot">Standard Chain of Thought</option>
|
|
|
|
| 1369 |
</thead>
|
| 1370 |
<tbody>
|
| 1371 |
{queueList.map((q, i, arr) => (
|
| 1372 |
+
<React.Fragment key={i}>
|
| 1373 |
+
<tr
|
| 1374 |
+
className={`border-t border-slate-800/50 hover:bg-white/5 ${selectedQueueItems.has(q.link) ? 'bg-indigo-900/20' : ''}`}
|
| 1375 |
+
onMouseDown={(e) => {
|
| 1376 |
+
isDraggingQueueRef.current = true;
|
| 1377 |
+
toggleQueueSelection(e, q.link, i, arr);
|
| 1378 |
+
}}
|
| 1379 |
+
onMouseEnter={() => {
|
| 1380 |
+
if (isDraggingQueueRef.current && !selectedQueueItems.has(q.link)) {
|
| 1381 |
+
setSelectedQueueItems(prev => new Set(prev).add(q.link));
|
| 1382 |
+
setLastQueueIndex(i);
|
| 1383 |
+
}
|
| 1384 |
+
}}
|
| 1385 |
+
>
|
| 1386 |
+
<td className="p-3 cursor-pointer">
|
| 1387 |
+
{selectedQueueItems.has(q.link) ? <CheckSquare className="w-4 h-4 text-indigo-400"/> : <Square className="w-4 h-4 text-slate-600"/>}
|
| 1388 |
+
</td>
|
| 1389 |
+
<td className="p-3 font-bold text-[10px]">{q.task_type === 'Verify' ? <span className="text-amber-400">VERIFY</span> : <span className="text-slate-500">INGEST</span>}</td>
|
| 1390 |
+
<td className="p-3">
|
| 1391 |
+
<div className="flex items-center gap-2">
|
| 1392 |
+
<span className="text-sky-500 font-mono break-all">{q.link}</span>
|
| 1393 |
+
{q.comments && q.comments.length > 0 && (
|
| 1394 |
+
<button
|
| 1395 |
+
onClick={(e) => toggleQueueExpand(e, q.link)}
|
| 1396 |
+
className="px-2 py-1 bg-slate-800 rounded text-[10px] text-slate-300 hover:bg-slate-700 flex items-center gap-1 z-10"
|
| 1397 |
+
>
|
| 1398 |
+
{expandedQueueItems.has(q.link) ? <ChevronUp className="w-3 h-3"/> : <ChevronDown className="w-3 h-3"/>}
|
| 1399 |
+
{q.comments.length} Comments
|
| 1400 |
+
</button>
|
| 1401 |
+
)}
|
| 1402 |
+
</div>
|
| 1403 |
+
</td>
|
| 1404 |
+
<td className="p-3">
|
| 1405 |
+
{q.status === 'Processed' ?
|
| 1406 |
+
<span className="text-emerald-500 flex items-center gap-1"><CheckCircle2 className="w-3 h-3"/> Done</span> :
|
| 1407 |
+
q.status === 'Error' ?
|
| 1408 |
+
<span className="text-red-500 flex items-center gap-1"><AlertCircle className="w-3 h-3"/> Error</span> :
|
| 1409 |
+
<span className="text-amber-500">Pending</span>
|
| 1410 |
+
}
|
| 1411 |
+
</td>
|
| 1412 |
+
</tr>
|
| 1413 |
+
{expandedQueueItems.has(q.link) && q.comments && q.comments.length > 0 && (
|
| 1414 |
+
<tr className="bg-slate-900/40">
|
| 1415 |
+
<td colSpan={4} className="p-0">
|
| 1416 |
+
<div className="px-12 py-3 border-l-2 border-indigo-500 ml-4 space-y-2">
|
| 1417 |
+
{q.comments.map((c: any, ci: number) => (
|
| 1418 |
+
<div key={ci} className="flex flex-col gap-1 p-2 bg-slate-900 border border-slate-800 rounded">
|
| 1419 |
+
<div className="flex justify-between items-center">
|
| 1420 |
+
<div className="text-[10px] font-bold text-indigo-400">{c.author}</div>
|
| 1421 |
+
<div className="flex gap-2">
|
| 1422 |
+
{c.link && <a href={c.link} target="_blank" rel="noreferrer" className="text-[10px] text-sky-400 hover:underline">View Post</a>}
|
| 1423 |
+
{c.link && <button onClick={() => addSingleLinkDirect(c.link)} className="text-[10px] bg-slate-800 hover:bg-slate-700 text-slate-300 px-2 py-0.5 rounded flex items-center gap-1"><Plus className="w-3 h-3"/> Queue Comment</button>}
|
| 1424 |
+
</div>
|
| 1425 |
+
</div>
|
| 1426 |
+
<div className="text-xs text-slate-400 line-clamp-2">{c.text}</div>
|
| 1427 |
+
</div>
|
| 1428 |
+
))}
|
| 1429 |
+
</div>
|
| 1430 |
+
</td>
|
| 1431 |
+
</tr>
|
| 1432 |
+
)}
|
| 1433 |
+
</React.Fragment>
|
| 1434 |
))}
|
| 1435 |
</tbody>
|
| 1436 |
</table>
|
|
|
|
| 1733 |
<span className="capitalize text-slate-300 font-bold">{k.replace('v', 'Video-').replace('a', 'Audio-').replace('c', 'Caption')}</span>
|
| 1734 |
<span className="text-emerald-400 font-mono font-bold">{(manualScores as any)[k]}/10</span>
|
| 1735 |
</div>
|
| 1736 |
+
<input type="range" min="1" max="10" value={(manualScores as any)[k]} onChange={e => setManualScores({...manualScores,[k]: parseInt(e.target.value)})} className="w-full accent-emerald-500"/>
|
| 1737 |
</div>
|
| 1738 |
))}
|
| 1739 |
</div>
|
|
|
|
| 1813 |
</div>
|
| 1814 |
)}
|
| 1815 |
|
| 1816 |
+
{/* COMMUNITY AND ANALYTICS TABS (UNCHANGED) */}
|
| 1817 |
{activeTab === 'community' && (
|
| 1818 |
<div className="flex h-full gap-6">
|
| 1819 |
<div className="w-1/3 bg-slate-900/50 border border-slate-800 rounded-xl overflow-auto">
|
main.go
CHANGED
|
@@ -33,7 +33,17 @@ func main() {
|
|
| 33 |
strings.HasPrefix(r.URL.Path, "/download-dataset") ||
|
| 34 |
strings.HasPrefix(r.URL.Path, "/extension") ||
|
| 35 |
strings.HasPrefix(r.URL.Path, "/manage") ||
|
| 36 |
-
strings.HasPrefix(r.URL.Path, "/queue")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
log.Printf("Proxying %s to Python Backend...", r.URL.Path)
|
| 39 |
proxy.ServeHTTP(w, r)
|
|
@@ -57,4 +67,4 @@ func main() {
|
|
| 57 |
if err := http.ListenAndServe(":"+port, nil); err != nil {
|
| 58 |
log.Fatal(err)
|
| 59 |
}
|
| 60 |
-
}
|
|
|
|
| 33 |
strings.HasPrefix(r.URL.Path, "/download-dataset") ||
|
| 34 |
strings.HasPrefix(r.URL.Path, "/extension") ||
|
| 35 |
strings.HasPrefix(r.URL.Path, "/manage") ||
|
| 36 |
+
strings.HasPrefix(r.URL.Path, "/queue") ||
|
| 37 |
+
strings.HasPrefix(r.URL.Path, "/a2a") ||
|
| 38 |
+
strings.HasPrefix(r.URL.Path, "/health") ||
|
| 39 |
+
strings.HasPrefix(r.URL.Path, "/benchmarks") ||
|
| 40 |
+
strings.HasPrefix(r.URL.Path, "/config") ||
|
| 41 |
+
strings.HasPrefix(r.URL.Path, "/tags") ||
|
| 42 |
+
strings.HasPrefix(r.URL.Path, "/manual") ||
|
| 43 |
+
strings.HasPrefix(r.URL.Path, "/profiles") ||
|
| 44 |
+
strings.HasPrefix(r.URL.Path, "/community") ||
|
| 45 |
+
strings.HasPrefix(r.URL.Path, "/dataset") ||
|
| 46 |
+
strings.HasPrefix(r.URL.Path, "/analyze") {
|
| 47 |
|
| 48 |
log.Printf("Proxying %s to Python Backend...", r.URL.Path)
|
| 49 |
proxy.ServeHTTP(w, r)
|
|
|
|
| 67 |
if err := http.ListenAndServe(":"+port, nil); err != nil {
|
| 68 |
log.Fatal(err)
|
| 69 |
}
|
| 70 |
+
}
|
src/app.py
CHANGED
|
@@ -1,5 +1,11 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import asyncio
|
| 4 |
import subprocess
|
| 5 |
from pathlib import Path
|
|
@@ -10,9 +16,6 @@ import datetime
|
|
| 10 |
import json
|
| 11 |
import hashlib
|
| 12 |
import re
|
| 13 |
-
import glob
|
| 14 |
-
import shutil
|
| 15 |
-
import time
|
| 16 |
from fastapi import FastAPI, Request, Form, UploadFile, File, Body, HTTPException
|
| 17 |
from fastapi.responses import HTMLResponse, StreamingResponse, PlainTextResponse, Response, FileResponse, JSONResponse
|
| 18 |
from fastapi.templating import Jinja2Templates
|
|
@@ -22,29 +25,17 @@ import yt_dlp
|
|
| 22 |
import inference_logic
|
| 23 |
import factuality_logic
|
| 24 |
import transcription
|
| 25 |
-
|
| 26 |
-
|
|
|
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
except ImportError:
|
| 32 |
-
try:
|
| 33 |
-
import croissant as mlc
|
| 34 |
-
CROISSANT_AVAILABLE = True
|
| 35 |
-
except ImportError:
|
| 36 |
-
mlc = None
|
| 37 |
-
CROISSANT_AVAILABLE = False
|
| 38 |
-
|
| 39 |
-
# Configure Logging with High Verbosity
|
| 40 |
-
logging.basicConfig(
|
| 41 |
-
level=logging.INFO,
|
| 42 |
-
format="%(asctime)s - %(levelname)s - %(message)s",
|
| 43 |
-
handlers=[logging.StreamHandler(sys.stdout)]
|
| 44 |
-
)
|
| 45 |
-
logger = logging.getLogger("vChat")
|
| 46 |
|
| 47 |
-
|
|
|
|
|
|
|
| 48 |
|
| 49 |
app = FastAPI()
|
| 50 |
|
|
@@ -56,469 +47,718 @@ app.add_middleware(
|
|
| 56 |
allow_headers=["*"],
|
| 57 |
)
|
| 58 |
|
| 59 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
STATIC_DIR = "/app/static"
|
| 61 |
if not os.path.isdir(STATIC_DIR):
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
os.
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
| 67 |
-
templates = Jinja2Templates(directory=STATIC_DIR)
|
| 68 |
|
| 69 |
-
#
|
| 70 |
-
os.
|
| 71 |
-
os.
|
| 72 |
-
|
| 73 |
-
os.makedirs("data/prompts", exist_ok=True)
|
| 74 |
-
os.makedirs("data/responses", exist_ok=True)
|
| 75 |
-
os.makedirs("metadata", exist_ok=True)
|
| 76 |
|
| 77 |
-
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
|
| 80 |
-
"standard": {"description": "Standard Analysis", "instruction": "You are an AI Factuality Assessment Agent operating under the 'Ali Arsanjani Factuality Factors' framework. Cross-reference claims and assess structural validity."},
|
| 81 |
-
"skeptic": {"description": "High Skepticism", "instruction": "You are a highly skeptical investigator. Question all assumptions, identify the weakest links in the narrative, and aggressively penalize logical fallacies."}
|
| 82 |
-
}
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
try:
|
| 88 |
-
transcription.load_model()
|
| 89 |
-
except Exception as e:
|
| 90 |
-
logger.warning(f"Could not load Whisper model: {e}")
|
| 91 |
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
logger.fatal(f"Could not load local inference models. Error: {e}", exc_info=True)
|
| 97 |
-
else:
|
| 98 |
-
logger.info("Running in LITE mode (API Only).")
|
| 99 |
|
| 100 |
-
|
| 101 |
-
async def read_root(request: Request):
|
| 102 |
-
custom_model_available = False
|
| 103 |
-
if not LITE_MODE:
|
| 104 |
-
custom_model_available = inference_logic.peft_model is not None
|
| 105 |
-
if not (Path(STATIC_DIR) / "index.html").exists():
|
| 106 |
-
return HTMLResponse(content="Frontend not found.", status_code=404)
|
| 107 |
-
return templates.TemplateResponse("index.html", {
|
| 108 |
-
"request": request,
|
| 109 |
-
"custom_model_available": custom_model_available,
|
| 110 |
-
"lite_mode": LITE_MODE
|
| 111 |
-
})
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
return[{"id": k, "name": v['description']} for k, v in PROMPT_VARIANTS.items()]
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
|
|
|
| 129 |
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
logger.error(f"[Subprocess] Failed ({process.returncode}): {err_msg}")
|
| 155 |
-
raise RuntimeError(f"Command failed: {err_msg}")
|
| 156 |
-
logger.info(f"[Subprocess] Success.")
|
| 157 |
-
return stdout.decode()
|
| 158 |
-
|
| 159 |
-
def extract_tweet_id(url: str) -> str | None:
|
| 160 |
-
match = re.search(r"(?:twitter|x)\.com/[^/]+/status/(\d+)", url)
|
| 161 |
-
if match: return match.group(1)
|
| 162 |
-
return None
|
| 163 |
-
|
| 164 |
-
def normalize_link(link: str) -> str:
|
| 165 |
-
if not link: return ""
|
| 166 |
-
return link.split('?')[0].strip().rstrip('/').replace('http://', '').replace('https://', '').replace('www.', '')
|
| 167 |
-
|
| 168 |
-
def check_if_processed(link: str) -> bool:
|
| 169 |
-
target_id = extract_tweet_id(link)
|
| 170 |
-
link_clean = normalize_link(link)
|
| 171 |
-
|
| 172 |
for filename in["data/dataset.csv", "data/manual_dataset.csv"]:
|
| 173 |
path = Path(filename)
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
if target_id and row_id == target_id: return True
|
| 189 |
-
else:
|
| 190 |
-
reader = csv.reader(f)
|
| 191 |
-
for row in reader:
|
| 192 |
-
if not row: continue
|
| 193 |
-
if link_clean in [normalize_link(r) for r in row]: return True
|
| 194 |
-
if target_id and target_id in row: return True
|
| 195 |
-
except Exception:
|
| 196 |
-
continue
|
| 197 |
-
return False
|
| 198 |
-
|
| 199 |
-
def update_queue_status_in_file(link: str, new_status: str):
|
| 200 |
q_path = Path("data/batch_queue.csv")
|
| 201 |
if not q_path.exists(): return
|
| 202 |
rows =[]
|
|
|
|
|
|
|
| 203 |
with open(q_path, 'r', encoding='utf-8') as f:
|
| 204 |
-
reader = csv.
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
| 207 |
for row in reader:
|
| 208 |
-
if row
|
| 209 |
-
|
| 210 |
-
|
|
|
|
|
|
|
| 211 |
rows.append(row)
|
| 212 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
writer = csv.writer(f)
|
| 214 |
-
writer.writerow(
|
| 215 |
-
writer.
|
| 216 |
-
|
| 217 |
-
async def prepare_video_assets_async(url: str) -> dict:
|
| 218 |
-
global progress_message
|
| 219 |
-
loop = asyncio.get_event_loop()
|
| 220 |
-
is_local = not (url.startswith("http://") or url.startswith("https://"))
|
| 221 |
-
video_id = "unknown"
|
| 222 |
-
transcript_path = None
|
| 223 |
-
|
| 224 |
-
logger.info(f"Preparing assets for URL: {url}")
|
| 225 |
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
sanitized_check = Path(f"data/videos/{video_id}_fixed.mp4")
|
| 235 |
-
|
| 236 |
-
cookies_path = get_cookies_path()
|
| 237 |
-
ydl_opts = {
|
| 238 |
-
'format': 'best[ext=mp4]/best',
|
| 239 |
-
'outtmpl': 'data/videos/%(id)s.%(ext)s',
|
| 240 |
-
'progress_hooks': [progress_hook],
|
| 241 |
-
'quiet': False,
|
| 242 |
-
'no_warnings': False,
|
| 243 |
-
'noplaylist': True,
|
| 244 |
-
'no_overwrites': True,
|
| 245 |
-
'writesubtitles': True,
|
| 246 |
-
'writeautomaticsub': True,
|
| 247 |
-
'subtitleslangs': ['en'],
|
| 248 |
-
'user_agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
| 249 |
-
}
|
| 250 |
-
|
| 251 |
-
if cookies_path:
|
| 252 |
-
ydl_opts['cookiefile'] = cookies_path
|
| 253 |
-
logger.info(f"Using cookies from {cookies_path}")
|
| 254 |
-
|
| 255 |
-
if sanitized_check.exists():
|
| 256 |
-
logger.info(f"Video {video_id} already cached at {sanitized_check}")
|
| 257 |
-
original_path = sanitized_check
|
| 258 |
-
metadata = {"id": video_id, "link": url, "caption": "Cached Video"}
|
| 259 |
-
else:
|
| 260 |
-
try:
|
| 261 |
-
logger.info(f"Starting yt-dlp download for {video_id}...")
|
| 262 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 263 |
-
info = await loop.run_in_executor(None, lambda: ydl.extract_info(url, download=True))
|
| 264 |
-
original_path = Path(ydl.prepare_filename(info))
|
| 265 |
-
metadata = {
|
| 266 |
-
"id": info.get("id", video_id), "link": info.get("webpage_url", url),
|
| 267 |
-
"caption": info.get("description", info.get("title", "N/A")).encode('ascii', 'ignore').decode('ascii').strip()[:500],
|
| 268 |
-
"postdatetime": info.get("upload_date", "N/A")
|
| 269 |
-
}
|
| 270 |
-
video_id = info.get("id", video_id)
|
| 271 |
-
logger.info("yt-dlp download successful.")
|
| 272 |
-
except yt_dlp.utils.DownloadError as e:
|
| 273 |
-
logger.error(f"yt-dlp download error: {e}")
|
| 274 |
-
if "No video could be found" in str(e):
|
| 275 |
-
raise ValueError(f"No video content found at {url}")
|
| 276 |
-
raise RuntimeError(f"Download failed: {str(e)}")
|
| 277 |
-
except Exception as e:
|
| 278 |
-
logger.error(f"Unexpected yt-dlp error: {e}")
|
| 279 |
-
raise RuntimeError(f"Download failed: {str(e)}")
|
| 280 |
-
|
| 281 |
-
transcript_path = next(Path("data/videos").glob(f"{video_id}*.en.vtt"), None)
|
| 282 |
-
if not transcript_path: transcript_path = next(Path("data/videos").glob(f"{video_id}*.vtt"), None)
|
| 283 |
-
|
| 284 |
-
sanitized_path = Path(f"data/videos/{video_id}_fixed.mp4")
|
| 285 |
-
|
| 286 |
-
# --- FFmpeg Sanitization Logic with Robust Fallback ---
|
| 287 |
-
if not sanitized_path.exists() and original_path.exists():
|
| 288 |
-
logger.info(f"Sanitizing video {video_id} (Original: {original_path})...")
|
| 289 |
-
ffmpeg_bin = shutil.which('ffmpeg')
|
| 290 |
-
if not ffmpeg_bin: raise RuntimeError("FFmpeg binary not found in system path!")
|
| 291 |
-
|
| 292 |
-
try:
|
| 293 |
-
await run_subprocess_async([ffmpeg_bin, "-i", str(original_path), "-c:v", "libx264", "-c:a", "aac", "-pix_fmt", "yuv420p", "-y", str(sanitized_path)])
|
| 294 |
-
logger.info("Sanitization (re-encode) successful.")
|
| 295 |
-
except Exception as e:
|
| 296 |
-
logger.warning(f"Re-encode failed ({e}). Attempting Stream Copy...")
|
| 297 |
-
try:
|
| 298 |
-
await run_subprocess_async([ffmpeg_bin, "-i", str(original_path), "-c", "copy", "-y", str(sanitized_path)])
|
| 299 |
-
logger.info("Sanitization (copy) successful.")
|
| 300 |
-
except Exception as e2:
|
| 301 |
-
logger.error(f"Sanitization failed completely: {e2}")
|
| 302 |
-
if original_path.suffix == '.mp4':
|
| 303 |
-
logger.warning("Using original file as sanitized file.")
|
| 304 |
-
shutil.copy(original_path, sanitized_path)
|
| 305 |
-
else:
|
| 306 |
-
raise RuntimeError("Could not produce a valid MP4 file.")
|
| 307 |
-
|
| 308 |
-
# --- Audio Extraction ---
|
| 309 |
-
audio_path = sanitized_path.with_suffix('.wav')
|
| 310 |
-
if not audio_path.exists() and sanitized_path.exists():
|
| 311 |
-
logger.info(f"Extracting audio to {audio_path}...")
|
| 312 |
-
try:
|
| 313 |
-
await run_subprocess_async(["ffmpeg", "-i", str(sanitized_path), "-vn", "-acodec", "pcm_s16le", "-ar", "16000", "-ac", "1", "-y", str(audio_path)])
|
| 314 |
-
logger.info("Audio extraction successful.")
|
| 315 |
-
except Exception as e:
|
| 316 |
-
logger.error(f"Audio extraction failed: {e}")
|
| 317 |
-
|
| 318 |
-
# --- Transcription ---
|
| 319 |
-
if not transcript_path and audio_path.exists() and transcription.transcription_model is not None:
|
| 320 |
-
logger.info("Generating transcript via Whisper...")
|
| 321 |
-
transcript_path = await loop.run_in_executor(None, transcription.generate_transcript, str(audio_path))
|
| 322 |
-
elif not transcript_path:
|
| 323 |
-
logger.info("Skipping local transcription (Whisper not loaded or audio missing).")
|
| 324 |
|
| 325 |
-
|
|
|
|
|
|
|
| 326 |
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 333 |
|
| 334 |
-
|
|
|
|
| 335 |
try:
|
| 336 |
-
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
|
| 358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
try:
|
| 360 |
-
|
|
|
|
|
|
|
| 361 |
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
yield f"Error preparing assets: {str(e)}"
|
| 370 |
-
logger.error(f"Asset prep failed for {video_url}: {e}")
|
| 371 |
-
return
|
| 372 |
-
|
| 373 |
-
video_path = paths["video"]
|
| 374 |
-
transcript_text = parse_vtt(paths["transcript"]) if paths["transcript"] else "No transcript (Audio/Video Analysis only)."
|
| 375 |
-
caption = paths["metadata"].get("caption", "")
|
| 376 |
|
| 377 |
-
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
elif model_selection == 'nrp':
|
| 391 |
-
if hasattr(inference_logic, 'run_nrp_labeling_pipeline'):
|
| 392 |
-
pipeline = inference_logic.run_nrp_labeling_pipeline
|
| 393 |
-
else:
|
| 394 |
-
yield "NRP Pipeline not supported in this version."
|
| 395 |
-
return
|
| 396 |
-
config = nrp_config
|
| 397 |
-
else:
|
| 398 |
-
yield f"Unknown model selection: {model_selection}"
|
| 399 |
-
return
|
| 400 |
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
|
| 418 |
-
logger.error(f"Inference pipeline completed but returned no labels for {video_url}")
|
| 419 |
-
yield "No labels generated. Check logs."
|
| 420 |
-
return
|
| 421 |
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 427 |
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 431 |
|
|
|
|
|
|
|
|
|
|
| 432 |
row = {
|
| 433 |
-
"id":
|
| 434 |
-
"
|
| 435 |
-
"
|
| 436 |
-
"
|
| 437 |
-
"
|
| 438 |
-
"
|
| 439 |
-
"
|
| 440 |
-
"
|
| 441 |
-
"
|
| 442 |
-
"
|
| 443 |
-
"
|
| 444 |
-
"
|
| 445 |
-
"
|
| 446 |
-
"
|
| 447 |
-
"
|
| 448 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 449 |
}
|
| 450 |
-
yield {"csv_row": row, "full_json": final_labels, "raw_toon": raw_toon}
|
| 451 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
except Exception as e:
|
| 453 |
-
logger.error(f"
|
| 454 |
-
|
| 455 |
|
| 456 |
-
@app.get("/
|
| 457 |
-
async def
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
|
| 477 |
@app.post("/queue/add")
|
| 478 |
-
async def add_queue_item(
|
| 479 |
-
link = payload.get("link")
|
| 480 |
-
if not link:
|
| 481 |
-
return {"status": "error", "message": "Link required"}
|
| 482 |
q_path = Path("data/batch_queue.csv")
|
| 483 |
existing = set()
|
| 484 |
if q_path.exists():
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
if normalize_link(link) in existing:
|
| 491 |
-
return {"status": "ignored", "message": "Link already in queue"}
|
| 492 |
|
| 493 |
with open(q_path, 'a', newline='', encoding='utf-8') as f:
|
| 494 |
-
writer = csv.
|
| 495 |
-
if not q_path.exists() or q_path.stat().st_size == 0:
|
| 496 |
-
|
| 497 |
-
writer.writerow([link.strip(), datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "Pending", "Ingest"])
|
| 498 |
return {"status": "success", "link": link}
|
| 499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 500 |
@app.post("/queue/clear_processed")
|
| 501 |
async def clear_processed_queue():
|
| 502 |
q_path = Path("data/batch_queue.csv")
|
| 503 |
if not q_path.exists(): return {"status": "success", "removed_count": 0}
|
| 504 |
-
|
| 505 |
kept_rows =[]
|
| 506 |
removed_count = 0
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
with open(q_path, 'w', newline='', encoding='utf-8') as f:
|
| 520 |
-
writer = csv.
|
| 521 |
-
writer.
|
| 522 |
writer.writerows(kept_rows)
|
| 523 |
return {"status": "success", "removed_count": removed_count}
|
| 524 |
|
|
@@ -526,24 +766,17 @@ async def clear_processed_queue():
|
|
| 526 |
async def delete_queue_items(request: Request):
|
| 527 |
try:
|
| 528 |
data = await request.json()
|
| 529 |
-
target_links = set(normalize_link(l) for l in data.get("links",[]))
|
| 530 |
q_path = Path("data/batch_queue.csv")
|
| 531 |
if not q_path.exists(): return {"status": "success", "count": 0}
|
| 532 |
kept_rows =[]
|
| 533 |
deleted_count = 0
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
for row in reader:
|
| 538 |
-
if not row: continue
|
| 539 |
-
link = row[0]
|
| 540 |
-
if normalize_link(link) in target_links:
|
| 541 |
-
deleted_count += 1
|
| 542 |
-
else:
|
| 543 |
-
kept_rows.append(row)
|
| 544 |
with open(q_path, 'w', newline='', encoding='utf-8') as f:
|
| 545 |
-
writer = csv.
|
| 546 |
-
writer.
|
| 547 |
writer.writerows(kept_rows)
|
| 548 |
return {"status": "success", "count": deleted_count}
|
| 549 |
except Exception as e: return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
|
@@ -552,89 +785,109 @@ async def delete_queue_items(request: Request):
|
|
| 552 |
async def requeue_items(request: Request):
|
| 553 |
try:
|
| 554 |
data = await request.json()
|
| 555 |
-
target_links = set(normalize_link(l) for l in data.get("links",[]))
|
| 556 |
q_path = Path("data/batch_queue.csv")
|
| 557 |
if not q_path.exists(): return {"status": "success", "count": 0}
|
| 558 |
rows =[]
|
| 559 |
requeued_count = 0
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
link = row[0]
|
| 566 |
-
if normalize_link(link) in target_links:
|
| 567 |
-
if len(row) > 2: row[2] = "Pending"
|
| 568 |
-
requeued_count += 1
|
| 569 |
-
rows.append(row)
|
| 570 |
with open(q_path, 'w', newline='', encoding='utf-8') as f:
|
| 571 |
-
writer = csv.
|
| 572 |
-
writer.
|
| 573 |
writer.writerows(rows)
|
| 574 |
return {"status": "success", "count": requeued_count}
|
| 575 |
except Exception as e: return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 576 |
|
| 577 |
-
@app.post("/
|
| 578 |
-
async def
|
| 579 |
-
global STOP_QUEUE_SIGNAL
|
| 580 |
-
logger.info("Received Stop Signal from User.")
|
| 581 |
-
STOP_QUEUE_SIGNAL = True
|
| 582 |
-
return {"status": "stopping"}
|
| 583 |
-
|
| 584 |
-
@app.post("/queue/upload_csv")
|
| 585 |
-
async def upload_csv_to_queue(file: UploadFile = File(...)):
|
| 586 |
try:
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
decoded = content.decode('latin-1').splitlines()
|
| 592 |
-
|
| 593 |
-
reader = csv.reader(decoded)
|
| 594 |
-
links_to_add =[]
|
| 595 |
-
header = next(reader, None)
|
| 596 |
-
if not header: return {"status": "empty file"}
|
| 597 |
-
|
| 598 |
-
link_idx = 0
|
| 599 |
-
header_lower = [h.lower() for h in header]
|
| 600 |
-
|
| 601 |
-
if "link" in header_lower: link_idx = header_lower.index("link")
|
| 602 |
-
elif "url" in header_lower: link_idx = header_lower.index("url")
|
| 603 |
-
elif len(header) > 0 and header[0].strip().startswith("http"):
|
| 604 |
-
links_to_add.append(header[0])
|
| 605 |
-
link_idx = 0
|
| 606 |
-
|
| 607 |
-
for row in reader:
|
| 608 |
-
if len(row) > link_idx and row[link_idx].strip():
|
| 609 |
-
links_to_add.append(row[link_idx].strip())
|
| 610 |
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
if queue_path.exists():
|
| 614 |
-
with open(queue_path, 'r', encoding='utf-8') as f:
|
| 615 |
-
existing_links = set(f.read().splitlines())
|
| 616 |
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
if
|
| 621 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 622 |
|
| 623 |
-
|
| 624 |
-
duplicate = False
|
| 625 |
-
for line in existing_links:
|
| 626 |
-
if normalize_link(link) in line:
|
| 627 |
-
duplicate = True
|
| 628 |
-
break
|
| 629 |
-
if duplicate: continue
|
| 630 |
-
|
| 631 |
-
writer.writerow([link, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "Pending", "Ingest"])
|
| 632 |
-
added_count += 1
|
| 633 |
-
|
| 634 |
-
return {"status": "success", "added": added_count}
|
| 635 |
except Exception as e:
|
| 636 |
-
|
| 637 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 638 |
|
| 639 |
@app.post("/queue/run")
|
| 640 |
async def run_queue_processing(
|
|
@@ -642,293 +895,138 @@ async def run_queue_processing(
|
|
| 642 |
gemini_api_key: str = Form(""), gemini_model_name: str = Form(""),
|
| 643 |
vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
|
| 644 |
nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
|
| 645 |
-
include_comments: bool = Form(False),
|
| 646 |
-
|
| 647 |
-
prompt_template: str = Form("standard"),
|
| 648 |
-
custom_query: str = Form(""),
|
| 649 |
-
max_reprompts: int = Form(1)
|
| 650 |
):
|
| 651 |
global STOP_QUEUE_SIGNAL
|
| 652 |
STOP_QUEUE_SIGNAL = False
|
|
|
|
| 653 |
gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name, "max_retries": max_reprompts}
|
| 654 |
-
vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key, "max_retries": max_reprompts}
|
| 655 |
nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url, "max_retries": max_reprompts}
|
| 656 |
-
|
| 657 |
sel_p = PROMPT_VARIANTS.get(prompt_template, PROMPT_VARIANTS['standard'])
|
| 658 |
system_persona_txt = sel_p['instruction']
|
| 659 |
-
if custom_query.strip(): system_persona_txt += f"\n\nSPECIAL INSTRUCTION: {custom_query}"
|
| 660 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 661 |
async def queue_stream():
|
| 662 |
-
|
| 663 |
-
if
|
| 664 |
-
|
| 665 |
-
|
| 666 |
|
| 667 |
-
|
| 668 |
-
|
| 669 |
-
|
| 670 |
-
try: next(reader)
|
| 671 |
-
except: pass
|
| 672 |
-
for row in reader:
|
| 673 |
-
if row and len(row) > 0:
|
| 674 |
-
status = row[2] if len(row) > 2 else "Pending"
|
| 675 |
-
if status == "Pending":
|
| 676 |
-
items.append(row[0])
|
| 677 |
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
logger.info(f"Starting batch queue processing for {total} items.")
|
| 682 |
-
|
| 683 |
-
for i, link in enumerate(items):
|
| 684 |
-
if STOP_QUEUE_SIGNAL:
|
| 685 |
-
yield "data: [SYSTEM] Stopped by user.\n\n"
|
| 686 |
-
logger.info("Stopping queue loop.")
|
| 687 |
break
|
| 688 |
|
| 689 |
-
if check_if_processed(link):
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
yield f"data: {msg}\n\n"
|
| 702 |
-
if isinstance(res, dict):
|
| 703 |
-
if "error" in res:
|
| 704 |
-
yield f"data:[ERROR DETAIL] {res['error']}\n\n"
|
| 705 |
-
if "csv_row" in res:
|
| 706 |
-
final_data = res
|
| 707 |
-
|
| 708 |
-
if final_data:
|
| 709 |
-
row = final_data["csv_row"]
|
| 710 |
-
vid_id = row["id"]
|
| 711 |
-
ts = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
|
| 712 |
-
|
| 713 |
-
# Save artifacts
|
| 714 |
-
json_path = f"data/labels/{vid_id}_{ts}_labels.json"
|
| 715 |
-
with open(json_path, 'w') as f: json.dump(final_data["full_json"], f, indent=2)
|
| 716 |
-
with open(f"data/labels/{vid_id}_{ts}.toon", 'w') as f: f.write(final_data["raw_toon"])
|
| 717 |
-
|
| 718 |
-
prompt_content = final_data.get("full_json", {}).get("meta_info", {}).get("prompt_used", "")
|
| 719 |
-
if prompt_content:
|
| 720 |
-
with open(f"data/prompts/{vid_id}_{ts}_prompt.txt", 'w', encoding='utf-8') as f:
|
| 721 |
-
f.write(prompt_content)
|
| 722 |
-
|
| 723 |
-
raw_response = final_data.get("raw_toon", "")
|
| 724 |
-
if raw_response:
|
| 725 |
-
with open(f"data/responses/{vid_id}.txt", 'w', encoding='utf-8') as f:
|
| 726 |
-
f.write(raw_response)
|
| 727 |
-
|
| 728 |
-
row["metadatapath"] = await generate_and_save_croissant_metadata(row)
|
| 729 |
-
row["json_path"] = json_path
|
| 730 |
-
|
| 731 |
-
dpath = Path("data/dataset.csv")
|
| 732 |
-
exists = dpath.exists()
|
| 733 |
-
with open(dpath, 'a', newline='', encoding='utf-8') as f:
|
| 734 |
-
writer = csv.DictWriter(f, fieldnames=list(row.keys()), extrasaction='ignore')
|
| 735 |
-
if not exists: writer.writeheader()
|
| 736 |
-
writer.writerow(row)
|
| 737 |
-
|
| 738 |
-
update_queue_status_in_file(link, "Processed")
|
| 739 |
-
processed_count += 1
|
| 740 |
-
yield f"data: [SUCCESS] Labeled.\n\n"
|
| 741 |
-
else:
|
| 742 |
-
update_queue_status_in_file(link, "Error")
|
| 743 |
-
yield f"data: [FAIL] Failed to label. Check logs.\n\n"
|
| 744 |
-
|
| 745 |
-
yield f"data: Batch Complete. +{processed_count} videos labeled.\n\n"
|
| 746 |
-
yield "event: close\ndata: Done\n\n"
|
| 747 |
-
|
| 748 |
-
return StreamingResponse(queue_stream(), media_type="text/event-stream")
|
| 749 |
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
data = await request.json()
|
| 754 |
-
link = data.get("link")
|
| 755 |
-
if not link: raise HTTPException(status_code=400, detail="No link")
|
| 756 |
-
q_path = Path("data/batch_queue.csv")
|
| 757 |
-
file_exists = q_path.exists()
|
| 758 |
-
|
| 759 |
-
if file_exists:
|
| 760 |
-
with open(q_path, 'r', encoding='utf-8') as f:
|
| 761 |
-
if link in f.read():
|
| 762 |
-
return {"status": "queued", "msg": "Duplicate"}
|
| 763 |
-
|
| 764 |
-
with open(q_path, 'a', newline='', encoding='utf-8') as f:
|
| 765 |
-
writer = csv.writer(f)
|
| 766 |
-
if not file_exists: writer.writerow(["link", "ingest_timestamp", "status", "task_type"])
|
| 767 |
-
writer.writerow([link, datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), "Pending", "Ingest"])
|
| 768 |
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
|
|
|
| 772 |
|
| 773 |
-
|
| 774 |
-
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
if not link or not comments: raise HTTPException(status_code=400, detail="Missing data")
|
| 780 |
-
|
| 781 |
-
csv_path = Path("data/comments.csv")
|
| 782 |
-
exists = csv_path.exists()
|
| 783 |
-
fieldnames = ["link", "author", "comment_text", "timestamp"]
|
| 784 |
-
|
| 785 |
-
with open(csv_path, 'a', newline='', encoding='utf-8') as f:
|
| 786 |
-
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
| 787 |
-
if not exists: writer.writeheader()
|
| 788 |
|
| 789 |
-
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
except Exception as e:
|
| 804 |
-
raise HTTPException(status_code=500, detail=str(e))
|
| 805 |
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
|
| 816 |
-
|
| 817 |
-
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
"source_credibility_score": labels.get("source_credibility_score", 0),
|
| 826 |
-
"logical_consistency_score": labels.get("logical_consistency_score", 0),
|
| 827 |
-
"emotional_manipulation_score": labels.get("emotional_manipulation_score", 0),
|
| 828 |
-
"video_audio_score": labels.get("video_audio_score", 0),
|
| 829 |
-
"video_caption_score": labels.get("video_caption_score", 0),
|
| 830 |
-
"audio_caption_score": labels.get("audio_caption_score", 0),
|
| 831 |
-
"final_veracity_score": labels.get("final_veracity_score", 0),
|
| 832 |
-
"final_reasoning": labels.get("reasoning", ""),
|
| 833 |
-
"stats_likes": stats.get("likes", 0),
|
| 834 |
-
"stats_shares": stats.get("shares", 0),
|
| 835 |
-
"stats_comments": stats.get("comments", 0),
|
| 836 |
-
"stats_platform": stats.get("platform", "unknown")
|
| 837 |
-
}
|
| 838 |
-
|
| 839 |
-
dpath = Path("data/manual_dataset.csv")
|
| 840 |
-
exists = dpath.exists()
|
| 841 |
-
fieldnames = list(row_data.keys())
|
| 842 |
-
|
| 843 |
-
with open(dpath, 'a', newline='', encoding='utf-8') as f:
|
| 844 |
-
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
| 845 |
-
if not exists: writer.writeheader()
|
| 846 |
-
writer.writerow(row_data)
|
| 847 |
-
|
| 848 |
-
return {"status": "saved"}
|
| 849 |
-
except Exception as e:
|
| 850 |
-
raise HTTPException(status_code=500, detail=str(e))
|
| 851 |
-
|
| 852 |
-
@app.get("/manage/list")
|
| 853 |
-
async def list_data():
|
| 854 |
-
data =[]
|
| 855 |
-
def read_csv(path, source_type):
|
| 856 |
-
if not path.exists(): return
|
| 857 |
-
with open(path, 'r', encoding='utf-8', errors='ignore') as f:
|
| 858 |
-
reader = csv.DictReader(f)
|
| 859 |
-
for row in reader:
|
| 860 |
-
if not row.get('id') or row['id'].strip() == "":
|
| 861 |
-
link = row.get('link', '')
|
| 862 |
-
tid = extract_tweet_id(link)
|
| 863 |
-
row['id'] = tid if tid else hashlib.md5(link.encode()).hexdigest()[:16]
|
| 864 |
|
| 865 |
-
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
with open(row['json_path'], 'r') as jf: json_content = json.load(jf)
|
| 869 |
-
except: pass
|
| 870 |
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
| 876 |
-
|
| 877 |
-
|
| 878 |
-
|
| 879 |
-
|
| 880 |
-
|
| 881 |
-
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
| 887 |
-
|
| 888 |
-
|
| 889 |
-
|
| 890 |
-
|
| 891 |
-
|
| 892 |
-
|
| 893 |
-
fieldnames = reader.fieldnames
|
| 894 |
-
for row in reader:
|
| 895 |
-
is_match = False
|
| 896 |
-
if id and row.get('id') == id: is_match = True
|
| 897 |
-
elif link and row.get('link') == link: is_match = True
|
| 898 |
-
if is_match:
|
| 899 |
-
found_in_file = True
|
| 900 |
-
deleted_count += 1
|
| 901 |
-
if not target_id: target_id = row.get('id')
|
| 902 |
-
else: rows.append(row)
|
| 903 |
-
if found_in_file:
|
| 904 |
-
with open(path, 'w', newline='', encoding='utf-8') as f:
|
| 905 |
-
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
| 906 |
-
writer.writeheader()
|
| 907 |
-
writer.writerows(rows)
|
| 908 |
-
|
| 909 |
-
remove_from_csv(Path("data/dataset.csv"))
|
| 910 |
-
remove_from_csv(Path("data/manual_dataset.csv"))
|
| 911 |
-
if target_id:
|
| 912 |
-
for p in Path("data/labels").glob(f"{target_id}_*"): p.unlink(missing_ok=True)
|
| 913 |
-
for p in Path("metadata").glob(f"{target_id}_*"): p.unlink(missing_ok=True)
|
| 914 |
-
return {"status": "deleted", "count": deleted_count}
|
| 915 |
-
|
| 916 |
-
@app.post("/label_video")
|
| 917 |
-
async def label_video_endpoint(
|
| 918 |
-
video_url: str = Form(...), model_selection: str = Form(...),
|
| 919 |
-
gemini_api_key: str = Form(""), gemini_model_name: str = Form(""),
|
| 920 |
-
vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
|
| 921 |
-
nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
|
| 922 |
-
include_comments: bool = Form(False),
|
| 923 |
-
reasoning_method: str = Form("cot")
|
| 924 |
-
):
|
| 925 |
-
gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name}
|
| 926 |
-
vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key}
|
| 927 |
-
nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url}
|
| 928 |
-
|
| 929 |
-
async def stream():
|
| 930 |
-
async for msg in get_labels_for_link(video_url, gemini_config, vertex_config, nrp_config, model_selection, include_comments, reasoning_method):
|
| 931 |
-
if isinstance(msg, str): yield f"data: {msg}\n\n"
|
| 932 |
-
if isinstance(msg, dict) and "csv_row" in msg: yield "data: Done. Labels generated.\n\n"
|
| 933 |
-
yield "event: close\ndata: Done.\n\n"
|
| 934 |
-
return StreamingResponse(stream(), media_type="text/event-stream")
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
+
|
| 4 |
+
# --- FIX: Ensure 'src' is in sys.path so sibling imports work ---
|
| 5 |
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
| 6 |
+
if current_dir not in sys.path:
|
| 7 |
+
sys.path.append(current_dir)
|
| 8 |
+
|
| 9 |
import asyncio
|
| 10 |
import subprocess
|
| 11 |
from pathlib import Path
|
|
|
|
| 16 |
import json
|
| 17 |
import hashlib
|
| 18 |
import re
|
|
|
|
|
|
|
|
|
|
| 19 |
from fastapi import FastAPI, Request, Form, UploadFile, File, Body, HTTPException
|
| 20 |
from fastapi.responses import HTMLResponse, StreamingResponse, PlainTextResponse, Response, FileResponse, JSONResponse
|
| 21 |
from fastapi.templating import Jinja2Templates
|
|
|
|
| 25 |
import inference_logic
|
| 26 |
import factuality_logic
|
| 27 |
import transcription
|
| 28 |
+
import user_analysis_logic
|
| 29 |
+
import agent_logic
|
| 30 |
+
import common_utils
|
| 31 |
|
| 32 |
+
from toon_parser import parse_veracity_toon
|
| 33 |
+
from labeling_logic import PROMPT_VARIANTS, LABELING_PROMPT_TEMPLATE, FCOT_MACRO_PROMPT
|
| 34 |
+
import benchmarking
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
+
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
| 37 |
+
logger = logging.getLogger(__name__)
|
| 38 |
+
LITE_MODE = os.getenv("LITE_MODE", "true").lower() == "true"
|
| 39 |
|
| 40 |
app = FastAPI()
|
| 41 |
|
|
|
|
| 47 |
allow_headers=["*"],
|
| 48 |
)
|
| 49 |
|
| 50 |
+
# --- CRITICAL: Mount A2A Agent Application FIRST ---
|
| 51 |
+
agent_mount_status = "pending"
|
| 52 |
+
try:
|
| 53 |
+
logger.info("Attempting to build A2A Agent App...")
|
| 54 |
+
a2a_agent_app = agent_logic.create_a2a_app()
|
| 55 |
+
if a2a_agent_app:
|
| 56 |
+
app.mount("/a2a", a2a_agent_app)
|
| 57 |
+
agent_mount_status = "success"
|
| 58 |
+
logger.info("✅ A2A Agent App successfully mounted at /a2a")
|
| 59 |
+
else:
|
| 60 |
+
logger.warning("⚠️ Agent factory returned None. Mounting internal fallback.")
|
| 61 |
+
from fastapi import FastAPI as InnerFastAPI
|
| 62 |
+
fallback = InnerFastAPI()
|
| 63 |
+
@fallback.post("/")
|
| 64 |
+
@fallback.post("/jsonrpc")
|
| 65 |
+
async def fallback_endpoint(request: Request):
|
| 66 |
+
return {"jsonrpc": "2.0", "result": {"text": "Fallback Agent (Factory returned None)", "data": {"status": "fallback"}}}
|
| 67 |
+
app.mount("/a2a", fallback)
|
| 68 |
+
agent_mount_status = "fallback_none"
|
| 69 |
+
except Exception as e:
|
| 70 |
+
logger.critical(f"❌ Failed to mount A2A Agent: {e}", exc_info=True)
|
| 71 |
+
from fastapi import FastAPI as InnerFastAPI
|
| 72 |
+
fallback = InnerFastAPI()
|
| 73 |
+
@fallback.post("/")
|
| 74 |
+
@fallback.post("/jsonrpc")
|
| 75 |
+
async def fallback_endpoint(request: Request):
|
| 76 |
+
return {"jsonrpc": "2.0", "result": {"text": f"Emergency Agent (Mount Error: {str(e)})", "data": {"status": "error"}}}
|
| 77 |
+
app.mount("/a2a", fallback)
|
| 78 |
+
agent_mount_status = f"error_{str(e)}"
|
| 79 |
+
|
| 80 |
+
# --- Static Files & Frontend ---
|
| 81 |
STATIC_DIR = "/app/static"
|
| 82 |
if not os.path.isdir(STATIC_DIR):
|
| 83 |
+
if os.path.isdir("/usr/share/vchat/static"):
|
| 84 |
+
STATIC_DIR = "/usr/share/vchat/static"
|
| 85 |
+
elif os.path.isdir("frontend/dist"):
|
| 86 |
+
STATIC_DIR = "frontend/dist"
|
| 87 |
+
else:
|
| 88 |
+
STATIC_DIR = "static"
|
| 89 |
+
os.makedirs(STATIC_DIR, exist_ok=True)
|
| 90 |
+
|
| 91 |
app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
|
|
|
|
| 92 |
|
| 93 |
+
# --- FIX: Explicitly mount assets for Vite support ---
|
| 94 |
+
assets_path = os.path.join(STATIC_DIR, "assets")
|
| 95 |
+
if os.path.exists(assets_path):
|
| 96 |
+
app.mount("/assets", StaticFiles(directory=assets_path), name="assets")
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
+
if not os.path.isdir("data/videos"):
|
| 99 |
+
os.makedirs("data/videos", exist_ok=True)
|
| 100 |
+
app.mount("/videos", StaticFiles(directory="data/videos"), name="videos")
|
| 101 |
|
| 102 |
+
templates = Jinja2Templates(directory=STATIC_DIR)
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
+
# Ensure all data directories exist
|
| 105 |
+
for d in["data", "data/videos", "data/labels", "data/prompts", "data/responses", "metadata", "data/profiles", "data/comments", "data/mnl_labeled", "models/sandbox_autogluon"]:
|
| 106 |
+
os.makedirs(d, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
+
try:
|
| 109 |
+
csv.field_size_limit(sys.maxsize)
|
| 110 |
+
except OverflowError:
|
| 111 |
+
csv.field_size_limit(2147483647)
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
STOP_QUEUE_SIGNAL = False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
+
# --- CONSTANTS ---
|
| 116 |
+
QUEUE_COLUMNS =["link", "ingest_timestamp", "status", "task_type"]
|
|
|
|
| 117 |
|
| 118 |
+
GROUND_TRUTH_FIELDS =[
|
| 119 |
+
"id", "link", "timestamp", "caption",
|
| 120 |
+
"visual_integrity_score", "audio_integrity_score", "source_credibility_score",
|
| 121 |
+
"logical_consistency_score", "emotional_manipulation_score",
|
| 122 |
+
"video_audio_score", "video_caption_score", "audio_caption_score",
|
| 123 |
+
"final_veracity_score", "final_reasoning",
|
| 124 |
+
"stats_likes", "stats_shares", "stats_comments", "stats_platform",
|
| 125 |
+
"tags", "classification", "source"
|
| 126 |
+
]
|
| 127 |
|
| 128 |
+
DATASET_COLUMNS =[
|
| 129 |
+
"id", "link", "timestamp", "caption",
|
| 130 |
+
"final_veracity_score", "visual_score", "audio_score", "source_score", "logic_score", "emotion_score",
|
| 131 |
+
"align_video_audio", "align_video_caption", "align_audio_caption",
|
| 132 |
+
"classification", "reasoning", "tags", "raw_toon",
|
| 133 |
+
"config_type", "config_model", "config_prompt", "config_reasoning", "config_params"
|
| 134 |
+
]
|
| 135 |
|
| 136 |
+
def ensure_csv_schema(file_path: Path, fieldnames: list):
|
| 137 |
+
if not file_path.exists(): return
|
| 138 |
+
try:
|
| 139 |
+
rows =[]
|
| 140 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
| 141 |
+
start_pos = f.tell()
|
| 142 |
+
line = f.readline()
|
| 143 |
+
if not line: return
|
| 144 |
+
existing_header =[h.strip() for h in line.split(',')]
|
| 145 |
+
missing =[col for col in fieldnames if col not in existing_header]
|
| 146 |
+
if not missing: return
|
| 147 |
+
f.seek(start_pos)
|
| 148 |
+
dict_reader = csv.DictReader(f)
|
| 149 |
+
rows = list(dict_reader)
|
| 150 |
+
|
| 151 |
+
with open(file_path, 'w', newline='', encoding='utf-8') as f:
|
| 152 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
| 153 |
+
writer.writeheader()
|
| 154 |
+
for row in rows: writer.writerow(row)
|
| 155 |
+
except Exception as e: logger.error(f"Schema migration error: {e}")
|
| 156 |
+
|
| 157 |
+
def get_processed_indices():
|
| 158 |
+
processed_ids = set()
|
| 159 |
+
processed_links = set()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
for filename in["data/dataset.csv", "data/manual_dataset.csv"]:
|
| 161 |
path = Path(filename)
|
| 162 |
+
for row in common_utils.robust_read_csv(path):
|
| 163 |
+
if row.get('id'): processed_ids.add(row.get('id'))
|
| 164 |
+
if row.get('link'): processed_links.add(common_utils.normalize_link(row.get('link')))
|
| 165 |
+
return processed_ids, processed_links
|
| 166 |
+
|
| 167 |
+
def check_if_processed(link: str, processed_ids=None, processed_links=None) -> bool:
|
| 168 |
+
target_id = common_utils.extract_tweet_id(link)
|
| 169 |
+
link_clean = common_utils.normalize_link(link)
|
| 170 |
+
if processed_ids is None or processed_links is None:
|
| 171 |
+
p_ids, p_links = get_processed_indices()
|
| 172 |
+
else: p_ids, p_links = processed_ids, processed_links
|
| 173 |
+
return (target_id and target_id in p_ids) or (link_clean and link_clean in p_links)
|
| 174 |
+
|
| 175 |
+
def update_queue_status(link: str, status: str, task_type: str = None):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
q_path = Path("data/batch_queue.csv")
|
| 177 |
if not q_path.exists(): return
|
| 178 |
rows =[]
|
| 179 |
+
updated = False
|
| 180 |
+
norm_target = common_utils.normalize_link(link)
|
| 181 |
with open(q_path, 'r', encoding='utf-8') as f:
|
| 182 |
+
reader = csv.DictReader(f)
|
| 183 |
+
fieldnames = list(reader.fieldnames) if reader.fieldnames else list(QUEUE_COLUMNS)
|
| 184 |
+
for f_name in QUEUE_COLUMNS:
|
| 185 |
+
if f_name not in fieldnames: fieldnames.append(f_name)
|
| 186 |
+
|
| 187 |
for row in reader:
|
| 188 |
+
if "task_type" not in row or not row["task_type"]: row["task_type"] = "Ingest"
|
| 189 |
+
if common_utils.normalize_link(row.get("link", "")) == norm_target:
|
| 190 |
+
if task_type is None or row["task_type"] == task_type:
|
| 191 |
+
row["status"] = status
|
| 192 |
+
updated = True
|
| 193 |
rows.append(row)
|
| 194 |
+
|
| 195 |
+
if updated:
|
| 196 |
+
with open(q_path, 'w', newline='', encoding='utf-8') as f:
|
| 197 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore')
|
| 198 |
+
writer.writeheader()
|
| 199 |
+
writer.writerows(rows)
|
| 200 |
+
|
| 201 |
+
def log_queue_error(link: str, error_msg: str, task_type: str = None):
|
| 202 |
+
p = Path("data/queue_errors.csv")
|
| 203 |
+
with open(p, 'a', newline='', encoding='utf-8') as f:
|
| 204 |
writer = csv.writer(f)
|
| 205 |
+
if not p.exists() or p.stat().st_size == 0: writer.writerow(["link", "timestamp", "error"])
|
| 206 |
+
writer.writerow([link, datetime.datetime.now().isoformat(), error_msg])
|
| 207 |
+
update_queue_status(link, "Error", task_type)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
|
| 209 |
+
@app.on_event("startup")
|
| 210 |
+
async def startup_event():
|
| 211 |
+
ensure_csv_schema(Path("data/dataset.csv"), DATASET_COLUMNS)
|
| 212 |
+
ensure_csv_schema(Path("data/manual_dataset.csv"), GROUND_TRUTH_FIELDS)
|
| 213 |
+
ensure_csv_schema(Path("data/batch_queue.csv"), QUEUE_COLUMNS)
|
| 214 |
+
if not LITE_MODE:
|
| 215 |
+
try: inference_logic.load_models()
|
| 216 |
+
except Exception: pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
+
@app.get("/health")
|
| 219 |
+
async def health_check():
|
| 220 |
+
return {"status": "ok", "agent_mount": agent_mount_status}
|
| 221 |
|
| 222 |
+
@app.get("/benchmarks/stats")
|
| 223 |
+
async def get_benchmark_stats():
|
| 224 |
+
return benchmarking.calculate_benchmarks()
|
| 225 |
+
|
| 226 |
+
@app.get("/benchmarks/leaderboard")
|
| 227 |
+
async def get_benchmark_leaderboard():
|
| 228 |
+
return benchmarking.generate_leaderboard()
|
| 229 |
+
|
| 230 |
+
@app.post("/benchmarks/train_predictive")
|
| 231 |
+
async def run_predictive_training(config: dict = Body(...)):
|
| 232 |
+
return benchmarking.train_predictive_sandbox(config)
|
| 233 |
+
|
| 234 |
+
@app.get("/config/prompts")
|
| 235 |
+
async def list_prompts():
|
| 236 |
+
return [{"id": k, "name": v['description']} for k, v in PROMPT_VARIANTS.items()]
|
| 237 |
+
|
| 238 |
+
@app.get("/config/tags")
|
| 239 |
+
async def list_configured_tags():
|
| 240 |
+
path = Path("data/tags.json")
|
| 241 |
+
if path.exists():
|
| 242 |
+
with open(path, 'r') as f: return json.load(f)
|
| 243 |
+
return {}
|
| 244 |
+
|
| 245 |
+
@app.post("/config/tags")
|
| 246 |
+
async def save_configured_tags(tags: dict = Body(...)):
|
| 247 |
+
path = Path("data/tags.json")
|
| 248 |
+
with open(path, 'w', encoding='utf-8') as f: json.dump(tags, f, indent=2)
|
| 249 |
+
return {"status": "success"}
|
| 250 |
+
|
| 251 |
+
@app.get("/tags/list")
|
| 252 |
+
async def list_all_tags():
|
| 253 |
+
tags_count = {}
|
| 254 |
+
path = Path("data/dataset.csv")
|
| 255 |
+
if path.exists():
|
| 256 |
+
for row in common_utils.robust_read_csv(path):
|
| 257 |
+
t_str = row.get("tags", "")
|
| 258 |
+
if t_str:
|
| 259 |
+
for t in t_str.split(','):
|
| 260 |
+
t = t.strip()
|
| 261 |
+
if t: tags_count[t] = tags_count.get(t, 0) + 1
|
| 262 |
+
sorted_tags = sorted(tags_count.items(), key=lambda x: x[1], reverse=True)
|
| 263 |
+
return [{"name": k, "count": v} for k, v in sorted_tags]
|
| 264 |
|
| 265 |
+
@app.post("/extension/ingest")
|
| 266 |
+
async def extension_ingest_link(request: Request):
|
| 267 |
try:
|
| 268 |
+
data = await request.json()
|
| 269 |
+
link = data.get("link")
|
| 270 |
+
comments = data.get("comments",[])
|
| 271 |
+
if not link:
|
| 272 |
+
raise HTTPException(status_code=400, detail="Link required")
|
| 273 |
+
|
| 274 |
+
q_path = Path("data/batch_queue.csv")
|
| 275 |
+
existing = set()
|
| 276 |
+
if q_path.exists():
|
| 277 |
+
for r in common_utils.robust_read_csv(q_path): existing.add(common_utils.normalize_link(r.get('link')))
|
| 278 |
+
|
| 279 |
+
normalized = common_utils.normalize_link(link)
|
| 280 |
+
if normalized not in existing:
|
| 281 |
+
with open(q_path, 'a', newline='', encoding='utf-8') as f:
|
| 282 |
+
writer = csv.DictWriter(f, fieldnames=QUEUE_COLUMNS, extrasaction='ignore')
|
| 283 |
+
if not q_path.exists() or q_path.stat().st_size == 0: writer.writeheader()
|
| 284 |
+
writer.writerow({"link": link.strip(), "ingest_timestamp": datetime.datetime.now().isoformat(), "status": "Pending", "task_type": "Ingest"})
|
| 285 |
+
|
| 286 |
+
if comments:
|
| 287 |
+
tid = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
|
| 288 |
+
context_path = Path(f"data/comments/{tid}_ingest.json")
|
| 289 |
+
with open(context_path, 'w', encoding='utf-8') as f:
|
| 290 |
+
json.dump({
|
| 291 |
+
"link": link,
|
| 292 |
+
"timestamp": datetime.datetime.now().isoformat(),
|
| 293 |
+
"comments": comments
|
| 294 |
+
}, f, indent=2)
|
| 295 |
+
logger.info(f"Saved {len(comments)} comments for ingestion context: {tid}")
|
| 296 |
|
| 297 |
+
return {"status": "success", "link": link, "comments_saved": len(comments)}
|
| 298 |
+
except Exception as e:
|
| 299 |
+
logger.error(f"Ingest Error: {e}")
|
| 300 |
+
return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 301 |
+
|
| 302 |
+
@app.post("/manual/promote")
|
| 303 |
+
async def promote_to_ground_truth(request: Request):
|
| 304 |
try:
|
| 305 |
+
data = await request.json()
|
| 306 |
+
target_ids = data.get("ids",[])
|
| 307 |
+
if not target_ids and data.get("id"): target_ids = [data.get("id")]
|
| 308 |
|
| 309 |
+
if not target_ids: return JSONResponse({"status": "error", "message": "No IDs provided"}, status_code=400)
|
| 310 |
+
|
| 311 |
+
ai_path = Path("data/dataset.csv")
|
| 312 |
+
ai_rows = {}
|
| 313 |
+
if ai_path.exists():
|
| 314 |
+
for row in common_utils.robust_read_csv(ai_path):
|
| 315 |
+
if row.get('id'): ai_rows[str(row['id'])] = row
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
+
manual_path = Path("data/manual_dataset.csv")
|
| 318 |
+
manual_exists = manual_path.exists()
|
| 319 |
+
existing_ids = set()
|
| 320 |
+
if manual_exists:
|
| 321 |
+
for row in common_utils.robust_read_csv(manual_path):
|
| 322 |
+
if row.get('id'): existing_ids.add(str(row['id']))
|
| 323 |
+
|
| 324 |
+
new_rows =[]
|
| 325 |
+
promoted_count = 0
|
| 326 |
+
for tid in target_ids:
|
| 327 |
+
tid_str = str(tid)
|
| 328 |
+
if tid_str in existing_ids: continue
|
| 329 |
+
found_row = ai_rows.get(tid_str)
|
| 330 |
+
if found_row:
|
| 331 |
+
mapped_row = {
|
| 332 |
+
"id": found_row.get("id"), "link": found_row.get("link"),
|
| 333 |
+
"timestamp": datetime.datetime.now().isoformat(), "caption": found_row.get("caption"),
|
| 334 |
+
"visual_integrity_score": found_row.get("visual_score", 0),
|
| 335 |
+
"audio_integrity_score": found_row.get("audio_score", 0),
|
| 336 |
+
"source_credibility_score": 5, "logical_consistency_score": found_row.get("logic_score", 0),
|
| 337 |
+
"emotional_manipulation_score": 5, "video_audio_score": 5,
|
| 338 |
+
"video_caption_score": found_row.get("align_video_caption", 0), "audio_caption_score": 5,
|
| 339 |
+
"final_veracity_score": found_row.get("final_veracity_score", 0),
|
| 340 |
+
"final_reasoning": found_row.get("reasoning", ""),
|
| 341 |
+
"stats_likes": 0, "stats_shares": 0, "stats_comments": 0, "stats_platform": "twitter",
|
| 342 |
+
"tags": found_row.get("tags", ""), "classification": found_row.get("classification", "None"),
|
| 343 |
+
"source": "manual_promoted"
|
| 344 |
+
}
|
| 345 |
+
new_rows.append(mapped_row)
|
| 346 |
+
promoted_count += 1
|
| 347 |
+
existing_ids.add(tid_str)
|
| 348 |
+
|
| 349 |
+
if not new_rows: return {"status": "success", "promoted_count": 0}
|
| 350 |
+
|
| 351 |
+
mode = 'a' if manual_exists else 'w'
|
| 352 |
+
with open(manual_path, mode, newline='', encoding='utf-8') as f:
|
| 353 |
+
writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS, extrasaction='ignore')
|
| 354 |
+
if not manual_exists or manual_path.stat().st_size == 0: writer.writeheader()
|
| 355 |
+
for r in new_rows: writer.writerow(r)
|
| 356 |
+
|
| 357 |
+
return {"status": "success", "promoted_count": promoted_count}
|
| 358 |
+
except Exception as e: return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 359 |
+
|
| 360 |
+
@app.post("/manual/delete")
|
| 361 |
+
async def delete_ground_truth(request: Request):
|
| 362 |
+
try:
|
| 363 |
+
data = await request.json()
|
| 364 |
+
target_ids = data.get("ids",[])
|
| 365 |
+
if not target_ids and data.get("id"): target_ids = [data.get("id")]
|
| 366 |
+
if not target_ids: raise HTTPException(status_code=400)
|
| 367 |
|
| 368 |
+
target_ids =[str(t) for t in target_ids]
|
| 369 |
+
manual_path = Path("data/manual_dataset.csv")
|
| 370 |
+
if not manual_path.exists(): return {"status": "error", "message": "File not found"}
|
| 371 |
+
|
| 372 |
+
rows =[]
|
| 373 |
+
deleted_count = 0
|
| 374 |
+
with open(manual_path, 'r', encoding='utf-8') as f:
|
| 375 |
+
reader = csv.DictReader(f)
|
| 376 |
+
for row in reader:
|
| 377 |
+
if str(row.get('id')) in target_ids: deleted_count += 1
|
| 378 |
+
else: rows.append(row)
|
| 379 |
+
with open(manual_path, 'w', newline='', encoding='utf-8') as f:
|
| 380 |
+
writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS)
|
| 381 |
+
writer.writeheader()
|
| 382 |
+
writer.writerows(rows)
|
| 383 |
+
|
| 384 |
+
return {"status": "success", "deleted_count": deleted_count}
|
| 385 |
+
except Exception as e: return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 386 |
+
|
| 387 |
+
@app.post("/manual/verify_queue")
|
| 388 |
+
async def verify_queue_items(request: Request):
|
| 389 |
+
try:
|
| 390 |
+
data = await request.json()
|
| 391 |
+
target_ids = data.get("ids",[])
|
| 392 |
+
resample_count = max(1, min(data.get("resample_count", 1), 100))
|
| 393 |
+
if not target_ids: return JSONResponse({"status": "error", "message": "No IDs provided"}, status_code=400)
|
| 394 |
|
| 395 |
+
manual_path = Path("data/manual_dataset.csv")
|
| 396 |
+
links_to_queue =[]
|
| 397 |
+
if manual_path.exists():
|
| 398 |
+
for row in common_utils.robust_read_csv(manual_path):
|
| 399 |
+
if str(row.get('id')) in target_ids:
|
| 400 |
+
links_to_queue.append(row.get('link'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
+
if not links_to_queue:
|
| 403 |
+
return {"status": "error", "message": "No matching links found in Ground Truth."}
|
| 404 |
+
|
| 405 |
+
q_path = Path("data/batch_queue.csv")
|
| 406 |
+
added_count = 0
|
| 407 |
+
with open(q_path, 'a', newline='', encoding='utf-8') as f:
|
| 408 |
+
writer = csv.DictWriter(f, fieldnames=QUEUE_COLUMNS, extrasaction='ignore')
|
| 409 |
+
if not q_path.exists() or q_path.stat().st_size == 0: writer.writeheader()
|
| 410 |
+
for link in links_to_queue:
|
| 411 |
+
for _ in range(resample_count):
|
| 412 |
+
writer.writerow({
|
| 413 |
+
"link": link.strip(),
|
| 414 |
+
"ingest_timestamp": datetime.datetime.now().isoformat(),
|
| 415 |
+
"status": "Pending",
|
| 416 |
+
"task_type": "Verify"
|
| 417 |
+
})
|
| 418 |
+
added_count += 1
|
|
|
|
|
|
|
|
|
|
| 419 |
|
| 420 |
+
return {"status": "success", "queued_count": added_count, "message": f"Added {added_count} items to queue for verification pipeline."}
|
| 421 |
+
except Exception as e:
|
| 422 |
+
return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 423 |
+
|
| 424 |
+
@app.get("/profiles/list")
|
| 425 |
+
async def list_profiles():
|
| 426 |
+
profiles_dir = Path("data/profiles")
|
| 427 |
+
profiles =[]
|
| 428 |
+
if not profiles_dir.exists(): return profiles
|
| 429 |
+
try:
|
| 430 |
+
for d in profiles_dir.iterdir():
|
| 431 |
+
if d.is_dir():
|
| 432 |
+
hist = d / "history.csv"
|
| 433 |
+
count = 0
|
| 434 |
+
if hist.exists():
|
| 435 |
+
with open(hist, 'r', encoding='utf-8', errors='ignore') as f: count = sum(1 for _ in f) - 1
|
| 436 |
+
profiles.append({"username": d.name, "posts_count": max(0, count)})
|
| 437 |
+
except Exception: pass
|
| 438 |
+
return sorted(profiles, key=lambda x: x['username'])
|
| 439 |
+
|
| 440 |
+
@app.get("/profiles/{username}/posts")
|
| 441 |
+
async def get_profile_posts(username: str):
|
| 442 |
+
csv_path = Path(f"data/profiles/{username}/history.csv")
|
| 443 |
+
posts =[]
|
| 444 |
+
if not csv_path.exists(): return posts
|
| 445 |
+
p_ids, p_links = get_processed_indices()
|
| 446 |
+
try:
|
| 447 |
+
for row in common_utils.robust_read_csv(csv_path):
|
| 448 |
+
link = row.get('link', '')
|
| 449 |
+
is_labeled = False
|
| 450 |
+
t_id = common_utils.extract_tweet_id(link)
|
| 451 |
+
if t_id and t_id in p_ids: is_labeled = True
|
| 452 |
+
elif common_utils.normalize_link(link) in p_links: is_labeled = True
|
| 453 |
+
row['is_labeled'] = is_labeled
|
| 454 |
+
posts.append(row)
|
| 455 |
+
except Exception: pass
|
| 456 |
+
return posts
|
| 457 |
+
|
| 458 |
+
@app.post("/extension/ingest_user_history")
|
| 459 |
+
async def ingest_user_history(request: Request):
|
| 460 |
+
try:
|
| 461 |
+
data = await request.json()
|
| 462 |
+
username = data.get("username")
|
| 463 |
+
posts = data.get("posts",[])
|
| 464 |
+
if not username or not posts: raise HTTPException(status_code=400)
|
| 465 |
+
profile_dir = Path(f"data/profiles/{username}")
|
| 466 |
+
profile_dir.mkdir(parents=True, exist_ok=True)
|
| 467 |
+
csv_path = profile_dir / "history.csv"
|
| 468 |
+
file_exists = csv_path.exists()
|
| 469 |
+
existing = set()
|
| 470 |
+
if file_exists:
|
| 471 |
+
for row in common_utils.robust_read_csv(csv_path): existing.add(row.get('link'))
|
| 472 |
|
| 473 |
+
with open(csv_path, 'a', newline='', encoding='utf-8') as f:
|
| 474 |
+
fieldnames =["link", "timestamp", "text", "is_reply", "metric_replies", "metric_reposts", "metric_likes", "metric_views", "ingested_at"]
|
| 475 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
| 476 |
+
if not file_exists: writer.writeheader()
|
| 477 |
+
ts = datetime.datetime.now().isoformat()
|
| 478 |
+
count = 0
|
| 479 |
+
for p in posts:
|
| 480 |
+
if p['link'] not in existing:
|
| 481 |
+
m = p.get('metrics', {})
|
| 482 |
+
writer.writerow({
|
| 483 |
+
"link": p.get('link'), "timestamp": p.get('timestamp'),
|
| 484 |
+
"text": p.get('text', '').replace('\n', ' '), "is_reply": p.get('is_reply', False),
|
| 485 |
+
"metric_replies": m.get('replies', 0), "metric_reposts": m.get('reposts', 0),
|
| 486 |
+
"metric_likes": m.get('likes', 0), "metric_views": m.get('views', 0),
|
| 487 |
+
"ingested_at": ts
|
| 488 |
+
})
|
| 489 |
+
count += 1
|
| 490 |
+
return {"status": "success", "new_posts": count}
|
| 491 |
+
except Exception as e: raise HTTPException(status_code=500, detail=str(e))
|
| 492 |
+
|
| 493 |
+
@app.post("/extension/save_comments")
|
| 494 |
+
async def extension_save_comments(request: Request):
|
| 495 |
+
try:
|
| 496 |
+
data = await request.json()
|
| 497 |
+
link = data.get("link")
|
| 498 |
+
comments = data.get("comments",[])
|
| 499 |
+
if not link: raise HTTPException(status_code=400)
|
| 500 |
+
tweet_id = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
|
| 501 |
+
csv_path = Path(f"data/comments/{tweet_id}.csv")
|
| 502 |
+
with open(csv_path, 'w', newline='', encoding='utf-8') as f:
|
| 503 |
+
writer = csv.DictWriter(f, fieldnames=["author", "text", "link", "timestamp"])
|
| 504 |
+
writer.writeheader()
|
| 505 |
+
ts = datetime.datetime.now().isoformat()
|
| 506 |
+
for c in comments:
|
| 507 |
+
writer.writerow({
|
| 508 |
+
"author": c.get("author", "Unknown"),
|
| 509 |
+
"text": c.get("text", "").replace("\n", " "),
|
| 510 |
+
"link": c.get("link", ""),
|
| 511 |
+
"timestamp": ts
|
| 512 |
+
})
|
| 513 |
+
return {"status": "success", "count": len(comments)}
|
| 514 |
+
except Exception as e: raise HTTPException(status_code=500, detail=str(e))
|
| 515 |
+
|
| 516 |
+
@app.post("/extension/save_manual")
|
| 517 |
+
@app.post("/manual/save")
|
| 518 |
+
async def save_manual_label(request: Request):
|
| 519 |
+
try:
|
| 520 |
+
data = await request.json()
|
| 521 |
+
link = data.get("link")
|
| 522 |
+
if not link:
|
| 523 |
+
return JSONResponse({"status": "error", "message": "Link required"}, status_code=400)
|
| 524 |
|
| 525 |
+
tweet_id = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
|
| 526 |
+
labels = data.get("labels", data)
|
| 527 |
+
|
| 528 |
row = {
|
| 529 |
+
"id": tweet_id, "link": link, "timestamp": datetime.datetime.now().isoformat(),
|
| 530 |
+
"caption": data.get("caption", ""),
|
| 531 |
+
"visual_integrity_score": labels.get("visual_integrity_score", 0),
|
| 532 |
+
"audio_integrity_score": labels.get("audio_integrity_score", 0),
|
| 533 |
+
"source_credibility_score": labels.get("source_credibility_score", 0),
|
| 534 |
+
"logical_consistency_score": labels.get("logical_consistency_score", 0),
|
| 535 |
+
"emotional_manipulation_score": labels.get("emotional_manipulation_score", 5),
|
| 536 |
+
"video_audio_score": labels.get("video_audio_score", 0),
|
| 537 |
+
"video_caption_score": labels.get("video_caption_score", 0),
|
| 538 |
+
"audio_caption_score": labels.get("audio_caption_score", 0),
|
| 539 |
+
"final_veracity_score": labels.get("final_veracity_score", 0),
|
| 540 |
+
"final_reasoning": labels.get("reasoning", labels.get("final_reasoning", "")),
|
| 541 |
+
"stats_likes": 0, "stats_shares": 0, "stats_comments": 0, "stats_platform": "twitter",
|
| 542 |
+
"tags": data.get("tags", labels.get("tags", "")),
|
| 543 |
+
"classification": labels.get("classification", "None"),
|
| 544 |
+
"source": "Manual"
|
| 545 |
+
}
|
| 546 |
+
|
| 547 |
+
tag_str = str(row["tags"])
|
| 548 |
+
tag_list =[t.strip() for t in tag_str.split(',') if t.strip()]
|
| 549 |
+
|
| 550 |
+
deep_json = {
|
| 551 |
+
"veracity_vectors": {
|
| 552 |
+
"visual_integrity_score": str(row["visual_integrity_score"]),
|
| 553 |
+
"audio_integrity_score": str(row["audio_integrity_score"]),
|
| 554 |
+
"source_credibility_score": str(row["source_credibility_score"]),
|
| 555 |
+
"logical_consistency_score": str(row["logical_consistency_score"]),
|
| 556 |
+
"emotional_manipulation_score": str(row["emotional_manipulation_score"])
|
| 557 |
+
},
|
| 558 |
+
"modalities": {
|
| 559 |
+
"video_audio_score": str(row["video_audio_score"]),
|
| 560 |
+
"video_caption_score": str(row["video_caption_score"]),
|
| 561 |
+
"audio_caption_score": str(row["audio_caption_score"])
|
| 562 |
+
},
|
| 563 |
+
"video_context_summary": row["caption"],
|
| 564 |
+
"tags": tag_list,
|
| 565 |
+
"factuality_factors": {
|
| 566 |
+
"claim_accuracy": "Manual",
|
| 567 |
+
"evidence_gap": "Manual Verification",
|
| 568 |
+
"grounding_check": "Manual Verification"
|
| 569 |
+
},
|
| 570 |
+
"disinformation_analysis": {
|
| 571 |
+
"classification": row["classification"],
|
| 572 |
+
"intent": "Manual Labeling",
|
| 573 |
+
"threat_vector": "Manual Labeling"
|
| 574 |
+
},
|
| 575 |
+
"final_assessment": {
|
| 576 |
+
"veracity_score_total": str(row["final_veracity_score"]),
|
| 577 |
+
"reasoning": row["final_reasoning"]
|
| 578 |
+
},
|
| 579 |
+
"raw_parsed_structure": {
|
| 580 |
+
"summary": {"text": row["caption"]},
|
| 581 |
+
"tags": {"keywords": row["tags"]},
|
| 582 |
+
"final": {"score": str(row["final_veracity_score"]), "reasoning": row["final_reasoning"]}
|
| 583 |
+
},
|
| 584 |
+
"meta_info": {
|
| 585 |
+
"id": tweet_id,
|
| 586 |
+
"timestamp": row["timestamp"],
|
| 587 |
+
"link": link,
|
| 588 |
+
"model_selection": "Manual"
|
| 589 |
+
}
|
| 590 |
}
|
|
|
|
| 591 |
|
| 592 |
+
json_path_direct = Path(f"data/labels/{tweet_id}.json")
|
| 593 |
+
with open(json_path_direct, 'w', encoding='utf-8') as jf:
|
| 594 |
+
json.dump(deep_json, jf, indent=2, ensure_ascii=False)
|
| 595 |
+
|
| 596 |
+
with open(Path(f"data/mnl_labeled/{tweet_id}.json"), 'w', encoding='utf-8') as jf:
|
| 597 |
+
json.dump(row, jf, indent=2, ensure_ascii=False)
|
| 598 |
+
|
| 599 |
+
manual_path = Path("data/manual_dataset.csv")
|
| 600 |
+
exists = manual_path.exists()
|
| 601 |
+
ensure_csv_schema(manual_path, GROUND_TRUTH_FIELDS)
|
| 602 |
+
|
| 603 |
+
rows =[]
|
| 604 |
+
found = False
|
| 605 |
+
if exists:
|
| 606 |
+
for r in common_utils.robust_read_csv(manual_path):
|
| 607 |
+
if str(r.get('id')) == str(tweet_id):
|
| 608 |
+
clean_row = {k: row.get(k, "") for k in GROUND_TRUTH_FIELDS}
|
| 609 |
+
rows.append(clean_row)
|
| 610 |
+
found = True
|
| 611 |
+
else:
|
| 612 |
+
rows.append(r)
|
| 613 |
+
|
| 614 |
+
if not found:
|
| 615 |
+
clean_row = {k: row.get(k, "") for k in GROUND_TRUTH_FIELDS}
|
| 616 |
+
rows.append(clean_row)
|
| 617 |
+
|
| 618 |
+
with open(manual_path, 'w', newline='', encoding='utf-8') as f:
|
| 619 |
+
writer = csv.DictWriter(f, fieldnames=GROUND_TRUTH_FIELDS, extrasaction='ignore')
|
| 620 |
+
writer.writeheader()
|
| 621 |
+
writer.writerows(rows)
|
| 622 |
+
|
| 623 |
+
update_queue_status(link, "Processed")
|
| 624 |
+
return {"status": "success", "id": tweet_id}
|
| 625 |
except Exception as e:
|
| 626 |
+
logger.error(f"Save Manual Error: {e}")
|
| 627 |
+
return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 628 |
|
| 629 |
+
@app.get("/community/list_datasets")
|
| 630 |
+
async def list_community_datasets():
|
| 631 |
+
path = Path("data/comments")
|
| 632 |
+
files =[]
|
| 633 |
+
if path.exists():
|
| 634 |
+
for f in path.glob("*.csv"):
|
| 635 |
+
files.append({"id": f.stem, "count": sum(1 for _ in open(f, encoding='utf-8'))-1})
|
| 636 |
+
return files
|
| 637 |
+
|
| 638 |
+
@app.post("/community/analyze")
|
| 639 |
+
async def analyze_community(dataset_id: str = Body(..., embed=True)):
|
| 640 |
+
path = Path(f"data/comments/{dataset_id}.csv")
|
| 641 |
+
if not path.exists(): raise HTTPException(status_code=404)
|
| 642 |
+
comments = list(common_utils.robust_read_csv(path))
|
| 643 |
+
if not comments: return {"score": 0, "verdict": "No Data"}
|
| 644 |
+
s_keys =["fake", "lie", "staged", "bs", "propaganda", "ai", "deepfake"]
|
| 645 |
+
t_keys =["true", "real", "confirmed", "fact", "source", "proof"]
|
| 646 |
+
s_count = sum(1 for c in comments if any(k in c['text'].lower() for k in s_keys))
|
| 647 |
+
t_count = sum(1 for c in comments if any(k in c['text'].lower() for k in t_keys))
|
| 648 |
+
score = max(0, min(100, 50 + (t_count * 2) - (s_count * 5)))
|
| 649 |
+
verdict = "Community Skepticism" if score < 30 else "Community Verification" if score > 70 else "Neutral/Mixed"
|
| 650 |
+
return {"dataset_id": dataset_id, "trust_score": score, "verdict": verdict, "details": {"skeptical_comments": s_count, "trusting_comments": t_count}}
|
| 651 |
+
|
| 652 |
+
@app.get("/dataset/list")
|
| 653 |
+
async def get_dataset_list():
|
| 654 |
+
dataset =[]
|
| 655 |
+
m_path = Path("data/manual_dataset.csv")
|
| 656 |
+
manual_ids = set()
|
| 657 |
+
if m_path.exists():
|
| 658 |
+
for row in common_utils.robust_read_csv(m_path):
|
| 659 |
+
row['source'] = 'Manual'
|
| 660 |
+
if row.get('id'): manual_ids.add(str(row['id']))
|
| 661 |
+
dataset.append(row)
|
| 662 |
+
|
| 663 |
+
path = Path("data/dataset.csv")
|
| 664 |
+
if path.exists():
|
| 665 |
+
for row in common_utils.robust_read_csv(path):
|
| 666 |
+
tid = str(row.get('id', ''))
|
| 667 |
+
if tid not in manual_ids:
|
| 668 |
+
row['source'] = 'AI'
|
| 669 |
+
dataset.append(row)
|
| 670 |
+
return sorted(dataset, key=lambda x: x.get('timestamp', ''), reverse=True)
|
| 671 |
+
|
| 672 |
+
@app.get("/analytics/account_integrity")
|
| 673 |
+
async def get_account_integrity():
|
| 674 |
+
id_map = {}
|
| 675 |
+
prof_dir = Path("data/profiles")
|
| 676 |
+
if prof_dir.exists():
|
| 677 |
+
for d in prof_dir.iterdir():
|
| 678 |
+
for row in common_utils.robust_read_csv(d/"history.csv"):
|
| 679 |
+
tid = common_utils.extract_tweet_id(row.get('link',''))
|
| 680 |
+
if tid: id_map[tid] = d.name
|
| 681 |
+
|
| 682 |
+
scores_map = {}
|
| 683 |
+
for fname in ["data/dataset.csv", "data/manual_dataset.csv"]:
|
| 684 |
+
for row in common_utils.robust_read_csv(Path(fname)):
|
| 685 |
+
tid = row.get('id')
|
| 686 |
+
sc = row.get('final_veracity_score', '0')
|
| 687 |
+
try: val = float(re.sub(r'[^\d.]', '', str(sc)))
|
| 688 |
+
except: val = 0
|
| 689 |
+
|
| 690 |
+
auth = id_map.get(tid, "Unknown")
|
| 691 |
+
if auth != "Unknown":
|
| 692 |
+
if auth not in scores_map: scores_map[auth] =[]
|
| 693 |
+
scores_map[auth].append(val)
|
| 694 |
+
|
| 695 |
+
return sorted([{"username": k, "avg_veracity": round(sum(v)/len(v),1), "posts_labeled": len(v)} for k,v in scores_map.items()], key=lambda x: x['avg_veracity'], reverse=True)
|
| 696 |
|
| 697 |
@app.post("/queue/add")
|
| 698 |
+
async def add_queue_item(link: str = Body(..., embed=True)):
|
|
|
|
|
|
|
|
|
|
| 699 |
q_path = Path("data/batch_queue.csv")
|
| 700 |
existing = set()
|
| 701 |
if q_path.exists():
|
| 702 |
+
for r in common_utils.robust_read_csv(q_path): existing.add(common_utils.normalize_link(r.get('link')))
|
| 703 |
+
|
| 704 |
+
normalized = common_utils.normalize_link(link)
|
| 705 |
+
if not normalized: raise HTTPException(status_code=400, detail="Invalid link")
|
| 706 |
+
if normalized in existing: return {"status": "ignored", "message": "Link already in queue"}
|
|
|
|
|
|
|
| 707 |
|
| 708 |
with open(q_path, 'a', newline='', encoding='utf-8') as f:
|
| 709 |
+
writer = csv.DictWriter(f, fieldnames=QUEUE_COLUMNS, extrasaction='ignore')
|
| 710 |
+
if not q_path.exists() or q_path.stat().st_size == 0: writer.writeheader()
|
| 711 |
+
writer.writerow({"link": link.strip(), "ingest_timestamp": datetime.datetime.now().isoformat(), "status": "Pending", "task_type": "Ingest"})
|
|
|
|
| 712 |
return {"status": "success", "link": link}
|
| 713 |
|
| 714 |
+
@app.post("/queue/upload_csv")
|
| 715 |
+
async def upload_csv(file: UploadFile = File(...)):
|
| 716 |
+
contents = await file.read()
|
| 717 |
+
lines = contents.decode('utf-8').splitlines()
|
| 718 |
+
q_path = Path("data/batch_queue.csv")
|
| 719 |
+
existing = set()
|
| 720 |
+
if q_path.exists():
|
| 721 |
+
for r in common_utils.robust_read_csv(q_path): existing.add(common_utils.normalize_link(r.get('link')))
|
| 722 |
+
|
| 723 |
+
added = 0
|
| 724 |
+
with open(q_path, 'a', newline='', encoding='utf-8') as f:
|
| 725 |
+
writer = csv.DictWriter(f, fieldnames=QUEUE_COLUMNS, extrasaction='ignore')
|
| 726 |
+
if not q_path.exists() or q_path.stat().st_size == 0: writer.writeheader()
|
| 727 |
+
for line in lines:
|
| 728 |
+
if 'http' in line:
|
| 729 |
+
raw = line.split(',')[0].strip()
|
| 730 |
+
if common_utils.normalize_link(raw) not in existing:
|
| 731 |
+
writer.writerow({"link": raw, "ingest_timestamp": datetime.datetime.now().isoformat(), "status": "Pending", "task_type": "Ingest"})
|
| 732 |
+
added += 1
|
| 733 |
+
return {"status": "success", "added_count": added}
|
| 734 |
+
|
| 735 |
+
@app.post("/queue/stop")
|
| 736 |
+
async def stop_processing():
|
| 737 |
+
global STOP_QUEUE_SIGNAL
|
| 738 |
+
STOP_QUEUE_SIGNAL = True
|
| 739 |
+
return {"status": "success", "message": "Stopping queue processing..."}
|
| 740 |
+
|
| 741 |
@app.post("/queue/clear_processed")
|
| 742 |
async def clear_processed_queue():
|
| 743 |
q_path = Path("data/batch_queue.csv")
|
| 744 |
if not q_path.exists(): return {"status": "success", "removed_count": 0}
|
| 745 |
+
p_ids, p_links = get_processed_indices()
|
| 746 |
kept_rows =[]
|
| 747 |
removed_count = 0
|
| 748 |
+
for row in common_utils.robust_read_csv(q_path):
|
| 749 |
+
link = row.get("link")
|
| 750 |
+
status = row.get("status", "Pending")
|
| 751 |
+
task_type = row.get("task_type", "Ingest")
|
| 752 |
+
|
| 753 |
+
is_done = False
|
| 754 |
+
if status == "Processed": is_done = True
|
| 755 |
+
elif task_type != "Verify" and check_if_processed(link, p_ids, p_links): is_done = True
|
| 756 |
+
|
| 757 |
+
if is_done: removed_count += 1
|
| 758 |
+
else: kept_rows.append(row)
|
|
|
|
| 759 |
with open(q_path, 'w', newline='', encoding='utf-8') as f:
|
| 760 |
+
writer = csv.DictWriter(f, fieldnames=QUEUE_COLUMNS, extrasaction='ignore')
|
| 761 |
+
writer.writeheader()
|
| 762 |
writer.writerows(kept_rows)
|
| 763 |
return {"status": "success", "removed_count": removed_count}
|
| 764 |
|
|
|
|
| 766 |
async def delete_queue_items(request: Request):
|
| 767 |
try:
|
| 768 |
data = await request.json()
|
| 769 |
+
target_links = set(common_utils.normalize_link(l) for l in data.get("links",[]))
|
| 770 |
q_path = Path("data/batch_queue.csv")
|
| 771 |
if not q_path.exists(): return {"status": "success", "count": 0}
|
| 772 |
kept_rows =[]
|
| 773 |
deleted_count = 0
|
| 774 |
+
for row in common_utils.robust_read_csv(q_path):
|
| 775 |
+
if common_utils.normalize_link(row.get('link')) in target_links: deleted_count += 1
|
| 776 |
+
else: kept_rows.append(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 777 |
with open(q_path, 'w', newline='', encoding='utf-8') as f:
|
| 778 |
+
writer = csv.DictWriter(f, fieldnames=QUEUE_COLUMNS, extrasaction='ignore')
|
| 779 |
+
writer.writeheader()
|
| 780 |
writer.writerows(kept_rows)
|
| 781 |
return {"status": "success", "count": deleted_count}
|
| 782 |
except Exception as e: return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
|
|
|
| 785 |
async def requeue_items(request: Request):
|
| 786 |
try:
|
| 787 |
data = await request.json()
|
| 788 |
+
target_links = set(common_utils.normalize_link(l) for l in data.get("links",[]))
|
| 789 |
q_path = Path("data/batch_queue.csv")
|
| 790 |
if not q_path.exists(): return {"status": "success", "count": 0}
|
| 791 |
rows =[]
|
| 792 |
requeued_count = 0
|
| 793 |
+
for row in common_utils.robust_read_csv(q_path):
|
| 794 |
+
if common_utils.normalize_link(row.get('link')) in target_links:
|
| 795 |
+
row['status'] = 'Pending'
|
| 796 |
+
requeued_count += 1
|
| 797 |
+
rows.append(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 798 |
with open(q_path, 'w', newline='', encoding='utf-8') as f:
|
| 799 |
+
writer = csv.DictWriter(f, fieldnames=QUEUE_COLUMNS, extrasaction='ignore')
|
| 800 |
+
writer.writeheader()
|
| 801 |
writer.writerows(rows)
|
| 802 |
return {"status": "success", "count": requeued_count}
|
| 803 |
except Exception as e: return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 804 |
|
| 805 |
+
@app.post("/dataset/delete")
|
| 806 |
+
async def delete_dataset_items(request: Request):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 807 |
try:
|
| 808 |
+
data = await request.json()
|
| 809 |
+
target_ids = data.get("ids",[])
|
| 810 |
+
if not target_ids: raise HTTPException(status_code=400)
|
| 811 |
+
target_ids = set(str(t) for t in target_ids)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 812 |
|
| 813 |
+
path = Path("data/dataset.csv")
|
| 814 |
+
if not path.exists(): return {"status": "success", "count": 0}
|
|
|
|
|
|
|
|
|
|
| 815 |
|
| 816 |
+
rows =[]
|
| 817 |
+
deleted_count = 0
|
| 818 |
+
for row in common_utils.robust_read_csv(path):
|
| 819 |
+
if str(row.get('id')) in target_ids:
|
| 820 |
+
deleted_count += 1
|
| 821 |
+
else:
|
| 822 |
+
rows.append(row)
|
| 823 |
+
|
| 824 |
+
with open(path, 'w', newline='', encoding='utf-8') as f:
|
| 825 |
+
writer = csv.DictWriter(f, fieldnames=DATASET_COLUMNS, extrasaction='ignore')
|
| 826 |
+
writer.writeheader()
|
| 827 |
+
writer.writerows(rows)
|
| 828 |
|
| 829 |
+
return {"status": "success", "deleted_count": deleted_count}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 830 |
except Exception as e:
|
| 831 |
+
return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
|
| 832 |
+
|
| 833 |
+
@app.post("/analyze/user_context")
|
| 834 |
+
async def analyze_user_context(request: Request):
|
| 835 |
+
try:
|
| 836 |
+
data = await request.json()
|
| 837 |
+
rep = await user_analysis_logic.generate_user_profile_report(data.get("username"))
|
| 838 |
+
return {"status": "success", "report": rep}
|
| 839 |
+
except Exception as e: return JSONResponse({"error": str(e)}, status_code=500)
|
| 840 |
+
|
| 841 |
+
@app.get("/download-dataset")
|
| 842 |
+
async def download_dataset():
|
| 843 |
+
file_path = Path("data/dataset.csv")
|
| 844 |
+
if file_path.exists():
|
| 845 |
+
return FileResponse(path=file_path, filename="dataset.csv", media_type='text/csv')
|
| 846 |
+
return Response("Dataset not found.", status_code=404)
|
| 847 |
+
|
| 848 |
+
@app.get("/model-architecture", response_class=PlainTextResponse)
|
| 849 |
+
async def get_model_architecture():
|
| 850 |
+
if LITE_MODE: return "Running in LITE mode."
|
| 851 |
+
if inference_logic.base_model: return str(inference_logic.base_model)
|
| 852 |
+
return "Model not loaded."
|
| 853 |
+
|
| 854 |
+
@app.get("/", response_class=HTMLResponse)
|
| 855 |
+
async def read_root(request: Request):
|
| 856 |
+
return templates.TemplateResponse("index.html", {"request": request})
|
| 857 |
+
|
| 858 |
+
@app.get("/queue/list")
|
| 859 |
+
async def get_queue_list():
|
| 860 |
+
q_path = Path("data/batch_queue.csv")
|
| 861 |
+
items =[]
|
| 862 |
+
p_ids, p_links = get_processed_indices()
|
| 863 |
+
for row in common_utils.robust_read_csv(q_path):
|
| 864 |
+
if row:
|
| 865 |
+
l = row.get("link")
|
| 866 |
+
status = row.get("status", "Pending")
|
| 867 |
+
task_type = row.get("task_type") or "Ingest"
|
| 868 |
+
|
| 869 |
+
if status == "Pending" and task_type != "Verify" and check_if_processed(l, p_ids, p_links): status = "Processed"
|
| 870 |
+
|
| 871 |
+
# Fetch associated comments to display in the dropdown
|
| 872 |
+
comments =[]
|
| 873 |
+
tid = common_utils.extract_tweet_id(l) or hashlib.md5(l.encode()).hexdigest()[:10]
|
| 874 |
+
c_path = Path(f"data/comments/{tid}_ingest.json")
|
| 875 |
+
if c_path.exists():
|
| 876 |
+
try:
|
| 877 |
+
with open(c_path, 'r') as f:
|
| 878 |
+
c_data = json.load(f)
|
| 879 |
+
comments = c_data.get('comments',[])
|
| 880 |
+
except Exception:
|
| 881 |
+
pass
|
| 882 |
+
|
| 883 |
+
items.append({
|
| 884 |
+
"link": l,
|
| 885 |
+
"timestamp": row.get("ingest_timestamp",""),
|
| 886 |
+
"status": status,
|
| 887 |
+
"task_type": task_type,
|
| 888 |
+
"comments": comments
|
| 889 |
+
})
|
| 890 |
+
return items
|
| 891 |
|
| 892 |
@app.post("/queue/run")
|
| 893 |
async def run_queue_processing(
|
|
|
|
| 895 |
gemini_api_key: str = Form(""), gemini_model_name: str = Form(""),
|
| 896 |
vertex_project_id: str = Form(""), vertex_location: str = Form(""), vertex_model_name: str = Form(""), vertex_api_key: str = Form(""),
|
| 897 |
nrp_api_key: str = Form(""), nrp_model_name: str = Form(""), nrp_base_url: str = Form("https://ellm.nrp-nautilus.io/v1"),
|
| 898 |
+
include_comments: bool = Form(False), reasoning_method: str = Form("cot"), prompt_template: str = Form("standard"),
|
| 899 |
+
custom_query: str = Form(""), max_reprompts: int = Form(1)
|
|
|
|
|
|
|
|
|
|
| 900 |
):
|
| 901 |
global STOP_QUEUE_SIGNAL
|
| 902 |
STOP_QUEUE_SIGNAL = False
|
| 903 |
+
|
| 904 |
gemini_config = {"api_key": gemini_api_key, "model_name": gemini_model_name, "max_retries": max_reprompts}
|
| 905 |
+
vertex_config = {"project_id": vertex_project_id, "location": vertex_location, "model_name": vertex_model_name, "api_key": vertex_api_key, "max_retries": max_reprompts, "use_search": True}
|
| 906 |
nrp_config = {"api_key": nrp_api_key, "model_name": nrp_model_name, "base_url": nrp_base_url, "max_retries": max_reprompts}
|
| 907 |
+
|
| 908 |
sel_p = PROMPT_VARIANTS.get(prompt_template, PROMPT_VARIANTS['standard'])
|
| 909 |
system_persona_txt = sel_p['instruction']
|
| 910 |
+
if custom_query.strip(): system_persona_txt += f"\n\nSPECIAL INSTRUCTION FOR THIS BATCH: {custom_query}"
|
| 911 |
|
| 912 |
+
if model_selection == 'vertex':
|
| 913 |
+
active_config = vertex_config
|
| 914 |
+
active_model_name = vertex_model_name
|
| 915 |
+
elif model_selection == 'nrp':
|
| 916 |
+
active_config = nrp_config
|
| 917 |
+
active_model_name = nrp_model_name
|
| 918 |
+
else:
|
| 919 |
+
active_config = gemini_config
|
| 920 |
+
active_model_name = gemini_model_name
|
| 921 |
+
|
| 922 |
+
config_params_dict = {
|
| 923 |
+
"reprompts": max_reprompts,
|
| 924 |
+
"include_comments": include_comments,
|
| 925 |
+
"agent_active": False
|
| 926 |
+
}
|
| 927 |
+
config_params_str = json.dumps(config_params_dict)
|
| 928 |
+
|
| 929 |
async def queue_stream():
|
| 930 |
+
q_path = Path("data/batch_queue.csv")
|
| 931 |
+
items =[r for r in common_utils.robust_read_csv(q_path) if r.get("link") and r.get("status", "Pending") == "Pending"]
|
| 932 |
+
p_ids, p_links = get_processed_indices()
|
| 933 |
+
yield f"data:[SYSTEM] Persona: {sel_p['description']}\n\n"
|
| 934 |
|
| 935 |
+
for item in items:
|
| 936 |
+
link = item.get("link")
|
| 937 |
+
task_type = item.get("task_type") or "Ingest"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 938 |
|
| 939 |
+
if STOP_QUEUE_SIGNAL:
|
| 940 |
+
yield f"data:[SYSTEM] Stopping by user request.\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 941 |
break
|
| 942 |
|
| 943 |
+
if task_type != "Verify" and check_if_processed(link, p_ids, p_links):
|
| 944 |
+
update_queue_status(link, "Processed", task_type)
|
| 945 |
+
continue
|
| 946 |
+
|
| 947 |
+
gt_data = None
|
| 948 |
+
if task_type == "Verify":
|
| 949 |
+
manual_path = Path("data/manual_dataset.csv")
|
| 950 |
+
if manual_path.exists():
|
| 951 |
+
for row in common_utils.robust_read_csv(manual_path):
|
| 952 |
+
if common_utils.normalize_link(row.get('link', '')) == common_utils.normalize_link(link):
|
| 953 |
+
gt_data = row
|
| 954 |
+
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 955 |
|
| 956 |
+
yield f"data: [START] {link} (Type: {task_type})\n\n"
|
| 957 |
+
tid = common_utils.extract_tweet_id(link) or hashlib.md5(link.encode()).hexdigest()[:10]
|
| 958 |
+
assets = await common_utils.prepare_video_assets(link, tid)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 959 |
|
| 960 |
+
if not assets or (not assets.get('video') and not assets.get('caption')):
|
| 961 |
+
log_queue_error(link, "Download/Fetch Error", task_type)
|
| 962 |
+
yield f"data: - Download Error.\n\n"
|
| 963 |
+
continue
|
| 964 |
|
| 965 |
+
trans = common_utils.parse_vtt(assets['transcript']) if assets.get('transcript') else "No transcript (Audio/Video missing)."
|
| 966 |
+
video_file = assets.get('video')
|
| 967 |
+
if not video_file:
|
| 968 |
+
yield f"data: - No video found. Text-only analysis.\n\n"
|
| 969 |
+
video_file = None
|
| 970 |
+
else: yield f"data: - Video found. Inferencing...\n\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 971 |
|
| 972 |
+
comments_path = Path(f"data/comments/{tid}_ingest.json")
|
| 973 |
+
current_system_persona = system_persona_txt
|
| 974 |
+
if comments_path.exists():
|
| 975 |
+
try:
|
| 976 |
+
with open(comments_path, 'r') as f:
|
| 977 |
+
c_data = json.load(f)
|
| 978 |
+
comments = c_data.get('comments',[])
|
| 979 |
+
if comments:
|
| 980 |
+
yield f"data: - Found {len(comments)} comments. Generating Community Context...\n\n"
|
| 981 |
+
community_summary = await inference_logic.generate_community_summary(comments, model_selection, active_config)
|
| 982 |
+
current_system_persona += f"\n\n### COMMUNITY NOTES / CONTEXT (from Comments):\n{community_summary}\n\nUse this community context to cross-reference claims but remain objective."
|
| 983 |
+
yield f"data: - Context Generated.\n\n"
|
| 984 |
+
except Exception as e:
|
| 985 |
+
logger.error(f"Error processing comments for context: {e}")
|
|
|
|
|
|
|
| 986 |
|
| 987 |
+
res_data = None
|
| 988 |
+
if model_selection == 'gemini':
|
| 989 |
+
async for chunk in inference_logic.run_gemini_labeling_pipeline(video_file, assets['caption'], trans, gemini_config, include_comments, reasoning_method, current_system_persona, request_id=tid):
|
| 990 |
+
if isinstance(chunk, str): yield f"data: - {chunk}\n\n"
|
| 991 |
+
else: res_data = chunk
|
| 992 |
+
elif model_selection == 'vertex':
|
| 993 |
+
async for chunk in inference_logic.run_vertex_labeling_pipeline(video_file, assets['caption'], trans, vertex_config, include_comments, reasoning_method, current_system_persona, request_id=tid):
|
| 994 |
+
if isinstance(chunk, str): yield f"data: - {chunk}\n\n"
|
| 995 |
+
else: res_data = chunk
|
| 996 |
+
elif model_selection == 'nrp':
|
| 997 |
+
async for chunk in inference_logic.run_nrp_labeling_pipeline(video_file, assets['caption'], trans, nrp_config, include_comments, reasoning_method, current_system_persona, request_id=tid):
|
| 998 |
+
if isinstance(chunk, str): yield f"data: - {chunk}\n\n"
|
| 999 |
+
else: res_data = chunk
|
| 1000 |
+
|
| 1001 |
+
if res_data and "parsed_data" in res_data:
|
| 1002 |
+
parsed = res_data["parsed_data"]
|
| 1003 |
+
d_path = Path("data/dataset.csv")
|
| 1004 |
+
ensure_csv_schema(d_path, DATASET_COLUMNS)
|
| 1005 |
+
exists = d_path.exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1006 |
|
| 1007 |
+
ai_score_val = parsed['final_assessment'].get('veracity_score_total', 0)
|
| 1008 |
+
try: ai_score = float(ai_score_val)
|
| 1009 |
+
except: ai_score = 0
|
|
|
|
|
|
|
| 1010 |
|
| 1011 |
+
if task_type == "Verify" and gt_data is not None:
|
| 1012 |
+
gt_final = float(gt_data.get('final_veracity_score', 0))
|
| 1013 |
+
delta = abs(ai_score - gt_final)
|
| 1014 |
+
vec_ai = parsed.get('veracity_vectors', {})
|
| 1015 |
+
mod_ai = parsed.get('modalities', {})
|
| 1016 |
+
|
| 1017 |
+
def s_float(v):
|
| 1018 |
+
try: return float(v)
|
| 1019 |
+
except: return 0.0
|
| 1020 |
+
|
| 1021 |
+
yield f"data: -[VERIFICATION PIPELINE] Configuration Analysis:\n"
|
| 1022 |
+
yield f"data: Model: {active_model_name} | Provider: {model_selection}\n"
|
| 1023 |
+
yield f"data: Reasoning: {reasoning_method} | Prompt: {prompt_template} | Reprompts: {max_reprompts}\n"
|
| 1024 |
+
yield f"data: -[VERIFICATION SCORES COMPARISON (AI vs Ground Truth)]\n"
|
| 1025 |
+
yield f"data: Visual Integrity : AI {s_float(vec_ai.get('visual_integrity_score'))} | GT {s_float(gt_data.get('visual_integrity_score'))}\n"
|
| 1026 |
+
yield f"data: Audio Integrity : AI {s_float(vec_ai.get('audio_integrity_score'))} | GT {s_float(gt_data.get('audio_integrity_score'))}\n"
|
| 1027 |
+
yield f"data: Source Credibility : AI {s_float(vec_ai.get('source_credibility_score'))} | GT {s_float(gt_data.get('source_credibility_score'))}\n"
|
| 1028 |
+
yield f"data: Logical Consistency: AI {s_float(vec_ai.get('logical_consistency_score'))} | GT {s_float(gt_data.get('logical_consistency_score'))}\n"
|
| 1029 |
+
yield f"data: Emotional Manipul. : AI {s_float(vec_ai.get('emotional_manipulation_score'))} | GT {s_float(gt_data.get('emotional_manipulation_score'))}\n"
|
| 1030 |
+
yield f"data: Video-Audio Align : AI {s_float(mod_ai.get('video_audio_score'))} | GT {s_float(gt_data.get('video_audio_score'))}\n"
|
| 1031 |
+
yield f"data: Video-Caption Align: AI {s_float(mod_ai.get('video_caption_score'))} | GT {s_float(gt_data.get('video_caption_score'))}\n"
|
| 1032 |
+
yield f"data: Audio-Caption Align: AI {s_float(mod_ai.get('audio_caption_score'))} | GT {s_float(gt_
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|