Commit ·
5dae8fe
1
Parent(s): 4d2be90
fina p3
Browse files- frontend/src/App.tsx +16 -15
- src/agent_logic.py +2 -2
- src/inference_logic.py +11 -5
- src/user_analysis_logic.py +147 -147
frontend/src/App.tsx
CHANGED
|
@@ -15,7 +15,7 @@ function App() {
|
|
| 15 |
const logContainerRef = useRef<HTMLDivElement>(null);
|
| 16 |
|
| 17 |
// Processing Config State
|
| 18 |
-
const
|
| 19 |
const[apiKey, setApiKey] = useState('');
|
| 20 |
const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
|
| 21 |
const[modelName, setModelName] = useState('qwen3'); // Default
|
|
@@ -23,32 +23,32 @@ function App() {
|
|
| 23 |
const [location, setLocation] = useState('us-central1');
|
| 24 |
const[includeComments, setIncludeComments] = useState(false);
|
| 25 |
const[reasoningMethod, setReasoningMethod] = useState('cot');
|
| 26 |
-
const
|
| 27 |
const[customQuery, setCustomQuery] = useState('');
|
| 28 |
const [maxRetries, setMaxRetries] = useState(1);
|
| 29 |
-
const
|
| 30 |
|
| 31 |
-
const
|
| 32 |
const[useCode, setUseCode] = useState(false);
|
| 33 |
|
| 34 |
// Data States
|
| 35 |
const[queueList, setQueueList] = useState<any[]>([]);
|
| 36 |
const [selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
|
| 37 |
-
const
|
| 38 |
const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
|
| 39 |
|
| 40 |
-
const
|
| 41 |
const [profileList, setProfileList] = useState<any[]>([]);
|
| 42 |
const[selectedProfile, setSelectedProfile] = useState<any>(null);
|
| 43 |
const [profilePosts, setProfilePosts] = useState<any[]>([]);
|
| 44 |
-
const
|
| 45 |
|
| 46 |
const[datasetList, setDatasetList] = useState<any[]>([]);
|
| 47 |
const[selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
|
| 48 |
const[lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
|
| 49 |
|
| 50 |
const [benchmarks, setBenchmarks] = useState<any>(null);
|
| 51 |
-
const
|
| 52 |
const[refreshTrigger, setRefreshTrigger] = useState(0);
|
| 53 |
|
| 54 |
// Tags
|
|
@@ -57,13 +57,13 @@ function App() {
|
|
| 57 |
// Manual Labeling State
|
| 58 |
const[manualLink, setManualLink] = useState('');
|
| 59 |
const [manualCaption, setManualCaption] = useState('');
|
| 60 |
-
const
|
| 61 |
const[manualReasoning, setManualReasoning] = useState('');
|
| 62 |
const[manualScores, setManualScores] = useState({
|
| 63 |
visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
|
| 64 |
va: 5, vc: 5, ac: 5, final: 50
|
| 65 |
});
|
| 66 |
-
const
|
| 67 |
const[aiReference, setAiReference] = useState<any>(null);
|
| 68 |
const[labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
|
| 69 |
const[labelFilter, setLabelFilter] = useState('');
|
|
@@ -77,7 +77,7 @@ function App() {
|
|
| 77 |
const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
|
| 78 |
|
| 79 |
// Resampling configuration
|
| 80 |
-
const
|
| 81 |
|
| 82 |
// Drag Selection references
|
| 83 |
const isDraggingQueueRef = useRef(false);
|
|
@@ -85,10 +85,10 @@ function App() {
|
|
| 85 |
|
| 86 |
// Quick Demo State
|
| 87 |
const[demoLink, setDemoLink] = useState('');
|
| 88 |
-
const
|
| 89 |
const[demoIsProcessing, setDemoIsProcessing] = useState(false);
|
| 90 |
const[demoResult, setDemoResult] = useState<any>(null);
|
| 91 |
-
const
|
| 92 |
const demoLogContainerRef = useRef<HTMLDivElement>(null);
|
| 93 |
|
| 94 |
useEffect(() => {
|
|
@@ -696,8 +696,9 @@ function App() {
|
|
| 696 |
return (
|
| 697 |
<div className="flex h-screen w-full bg-[#09090b] text-slate-200 font-sans overflow-hidden">
|
| 698 |
<datalist id="modelSuggestions">
|
| 699 |
-
<option value="gemini-1.5-pro
|
| 700 |
-
<option value="gemini-
|
|
|
|
| 701 |
<option value="qwen3" />
|
| 702 |
<option value="gpt-oss" />
|
| 703 |
<option value="kimi" />
|
|
|
|
| 15 |
const logContainerRef = useRef<HTMLDivElement>(null);
|
| 16 |
|
| 17 |
// Processing Config State
|
| 18 |
+
const[modelProvider, setModelProvider] = useState('nrp');
|
| 19 |
const[apiKey, setApiKey] = useState('');
|
| 20 |
const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
|
| 21 |
const[modelName, setModelName] = useState('qwen3'); // Default
|
|
|
|
| 23 |
const [location, setLocation] = useState('us-central1');
|
| 24 |
const[includeComments, setIncludeComments] = useState(false);
|
| 25 |
const[reasoningMethod, setReasoningMethod] = useState('cot');
|
| 26 |
+
const[promptTemplate, setPromptTemplate] = useState('standard');
|
| 27 |
const[customQuery, setCustomQuery] = useState('');
|
| 28 |
const [maxRetries, setMaxRetries] = useState(1);
|
| 29 |
+
const[availablePrompts, setAvailablePrompts] = useState<any[]>([]);
|
| 30 |
|
| 31 |
+
const[useSearch, setUseSearch] = useState(false);
|
| 32 |
const[useCode, setUseCode] = useState(false);
|
| 33 |
|
| 34 |
// Data States
|
| 35 |
const[queueList, setQueueList] = useState<any[]>([]);
|
| 36 |
const [selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
|
| 37 |
+
const[expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
|
| 38 |
const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
|
| 39 |
|
| 40 |
+
const[singleLinkInput, setSingleLinkInput] = useState('');
|
| 41 |
const [profileList, setProfileList] = useState<any[]>([]);
|
| 42 |
const[selectedProfile, setSelectedProfile] = useState<any>(null);
|
| 43 |
const [profilePosts, setProfilePosts] = useState<any[]>([]);
|
| 44 |
+
const[integrityBoard, setIntegrityBoard] = useState<any[]>([]);
|
| 45 |
|
| 46 |
const[datasetList, setDatasetList] = useState<any[]>([]);
|
| 47 |
const[selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
|
| 48 |
const[lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
|
| 49 |
|
| 50 |
const [benchmarks, setBenchmarks] = useState<any>(null);
|
| 51 |
+
const[leaderboard, setLeaderboard] = useState<any[]>([]);
|
| 52 |
const[refreshTrigger, setRefreshTrigger] = useState(0);
|
| 53 |
|
| 54 |
// Tags
|
|
|
|
| 57 |
// Manual Labeling State
|
| 58 |
const[manualLink, setManualLink] = useState('');
|
| 59 |
const [manualCaption, setManualCaption] = useState('');
|
| 60 |
+
const[manualTags, setManualTags] = useState('');
|
| 61 |
const[manualReasoning, setManualReasoning] = useState('');
|
| 62 |
const[manualScores, setManualScores] = useState({
|
| 63 |
visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
|
| 64 |
va: 5, vc: 5, ac: 5, final: 50
|
| 65 |
});
|
| 66 |
+
const[showRubric, setShowRubric] = useState(false);
|
| 67 |
const[aiReference, setAiReference] = useState<any>(null);
|
| 68 |
const[labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
|
| 69 |
const[labelFilter, setLabelFilter] = useState('');
|
|
|
|
| 77 |
const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
|
| 78 |
|
| 79 |
// Resampling configuration
|
| 80 |
+
const[resampleCount, setResampleCount] = useState<number>(1);
|
| 81 |
|
| 82 |
// Drag Selection references
|
| 83 |
const isDraggingQueueRef = useRef(false);
|
|
|
|
| 85 |
|
| 86 |
// Quick Demo State
|
| 87 |
const[demoLink, setDemoLink] = useState('');
|
| 88 |
+
const[demoLogs, setDemoLogs] = useState('');
|
| 89 |
const[demoIsProcessing, setDemoIsProcessing] = useState(false);
|
| 90 |
const[demoResult, setDemoResult] = useState<any>(null);
|
| 91 |
+
const[showDemoConfig, setShowDemoConfig] = useState(false);
|
| 92 |
const demoLogContainerRef = useRef<HTMLDivElement>(null);
|
| 93 |
|
| 94 |
useEffect(() => {
|
|
|
|
| 696 |
return (
|
| 697 |
<div className="flex h-screen w-full bg-[#09090b] text-slate-200 font-sans overflow-hidden">
|
| 698 |
<datalist id="modelSuggestions">
|
| 699 |
+
<option value="gemini-1.5-pro" />
|
| 700 |
+
<option value="gemini-1.5-flash" />
|
| 701 |
+
<option value="gemini-2.0-flash" />
|
| 702 |
<option value="qwen3" />
|
| 703 |
<option value="gpt-oss" />
|
| 704 |
<option value="kimi" />
|
src/agent_logic.py
CHANGED
|
@@ -42,7 +42,7 @@ async def _analyze_video_async(video_url: str, context: str, agent_config: dict)
|
|
| 42 |
api_key = agent_config.get("api_key", os.getenv("GEMINI_API_KEY", ""))
|
| 43 |
project_id = agent_config.get("project_id", os.getenv("VERTEX_PROJECT_ID", ""))
|
| 44 |
location = agent_config.get("location", os.getenv("VERTEX_LOCATION", "us-central1"))
|
| 45 |
-
model_name = agent_config.get("model_name", os.getenv("VERTEX_MODEL_NAME", "gemini-1.5-pro
|
| 46 |
reasoning_method = agent_config.get("reasoning_method", "cot")
|
| 47 |
prompt_template = agent_config.get("prompt_template", "standard")
|
| 48 |
|
|
@@ -325,7 +325,7 @@ def create_a2a_app():
|
|
| 325 |
)
|
| 326 |
|
| 327 |
if provider == 'vertex' and not project_id:
|
| 328 |
-
reply = f"Welcome to the LiarMP4 Agent Nexus!\n\nIt looks like you haven't configured **Vertex AI** yet. Please enter your Google Cloud Project ID in the 'Inference Config' panel on the left, or tell me directly: *'set project id to
|
| 329 |
elif provider == 'gemini' and not api_key:
|
| 330 |
reply = f"👋 Welcome to the LiarMP4 Agent Nexus!\n\nIt looks like you haven't configured **Gemini** yet. Please enter your API Key in the 'Inference Config' panel on the left, or tell me directly: *'set api key to[YOUR_KEY]'*.\n\n{base_capabilities}"
|
| 331 |
else:
|
|
|
|
| 42 |
api_key = agent_config.get("api_key", os.getenv("GEMINI_API_KEY", ""))
|
| 43 |
project_id = agent_config.get("project_id", os.getenv("VERTEX_PROJECT_ID", ""))
|
| 44 |
location = agent_config.get("location", os.getenv("VERTEX_LOCATION", "us-central1"))
|
| 45 |
+
model_name = agent_config.get("model_name", os.getenv("VERTEX_MODEL_NAME", "gemini-1.5-pro"))
|
| 46 |
reasoning_method = agent_config.get("reasoning_method", "cot")
|
| 47 |
prompt_template = agent_config.get("prompt_template", "standard")
|
| 48 |
|
|
|
|
| 325 |
)
|
| 326 |
|
| 327 |
if provider == 'vertex' and not project_id:
|
| 328 |
+
reply = f"Welcome to the LiarMP4 Agent Nexus!\n\nIt looks like you haven't configured **Vertex AI** yet. Please enter your Google Cloud Project ID in the 'Inference Config' panel on the left, or tell me directly: *'set project id to[YOUR_PROJECT]'*.\n\n{base_capabilities}"
|
| 329 |
elif provider == 'gemini' and not api_key:
|
| 330 |
reply = f"👋 Welcome to the LiarMP4 Agent Nexus!\n\nIt looks like you haven't configured **Gemini** yet. Please enter your API Key in the 'Inference Config' panel on the left, or tell me directly: *'set api key to[YOUR_KEY]'*.\n\n{base_capabilities}"
|
| 331 |
else:
|
src/inference_logic.py
CHANGED
|
@@ -140,7 +140,9 @@ async def generate_simple_text(prompt: str, model_type: str, config: dict):
|
|
| 140 |
if model_type == 'gemini':
|
| 141 |
if genai_legacy is None: return "Error: Legacy SDK missing."
|
| 142 |
genai_legacy.configure(api_key=config.get("api_key"))
|
| 143 |
-
|
|
|
|
|
|
|
| 144 |
response = await loop.run_in_executor(
|
| 145 |
None,
|
| 146 |
lambda: model.generate_content(prompt, generation_config={"temperature": 0.0})
|
|
@@ -157,7 +159,7 @@ async def generate_simple_text(prompt: str, model_type: str, config: dict):
|
|
| 157 |
response = await loop.run_in_executor(
|
| 158 |
None,
|
| 159 |
lambda: cl.models.generate_content(
|
| 160 |
-
model=config
|
| 161 |
contents=prompt,
|
| 162 |
config=GenerateContentConfig(temperature=0.0)
|
| 163 |
)
|
|
@@ -298,7 +300,10 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 298 |
active_tools.append({"code_execution": {}})
|
| 299 |
system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
|
| 300 |
|
| 301 |
-
|
|
|
|
|
|
|
|
|
|
| 302 |
toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
|
| 303 |
score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
|
| 304 |
tag_list_text = get_formatted_tag_list()
|
|
@@ -337,7 +342,7 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 337 |
|
| 338 |
macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
|
| 339 |
save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
|
| 340 |
-
inputs1 =
|
| 341 |
if uploaded_file and uploaded_file.state.name != "FAILED": inputs1.insert(0, uploaded_file)
|
| 342 |
res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1))
|
| 343 |
macro_hypothesis = res1.text
|
|
@@ -406,7 +411,8 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
|
|
| 406 |
|
| 407 |
project_id = vertex_config.get("project_id")
|
| 408 |
location = vertex_config.get("location", "us-central1")
|
| 409 |
-
model_name = vertex_config.get("model_name", "gemini-1.5-pro
|
|
|
|
| 410 |
max_retries = int(vertex_config.get("max_retries", 1))
|
| 411 |
api_key = vertex_config.get("api_key")
|
| 412 |
|
|
|
|
| 140 |
if model_type == 'gemini':
|
| 141 |
if genai_legacy is None: return "Error: Legacy SDK missing."
|
| 142 |
genai_legacy.configure(api_key=config.get("api_key"))
|
| 143 |
+
model_name = config.get("model_name", "gemini-1.5-pro")
|
| 144 |
+
if not model_name: model_name = "gemini-1.5-pro"
|
| 145 |
+
model = genai_legacy.GenerativeModel(model_name)
|
| 146 |
response = await loop.run_in_executor(
|
| 147 |
None,
|
| 148 |
lambda: model.generate_content(prompt, generation_config={"temperature": 0.0})
|
|
|
|
| 159 |
response = await loop.run_in_executor(
|
| 160 |
None,
|
| 161 |
lambda: cl.models.generate_content(
|
| 162 |
+
model=config.get('model_name', 'gemini-1.5-pro'),
|
| 163 |
contents=prompt,
|
| 164 |
config=GenerateContentConfig(temperature=0.0)
|
| 165 |
)
|
|
|
|
| 300 |
active_tools.append({"code_execution": {}})
|
| 301 |
system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
|
| 302 |
|
| 303 |
+
model_name = gemini_config.get("model_name", "gemini-1.5-pro")
|
| 304 |
+
if not model_name: model_name = "gemini-1.5-pro"
|
| 305 |
+
model = genai_legacy.GenerativeModel(model_name, tools=active_tools if active_tools else None)
|
| 306 |
+
|
| 307 |
toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
|
| 308 |
score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
|
| 309 |
tag_list_text = get_formatted_tag_list()
|
|
|
|
| 342 |
|
| 343 |
macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
|
| 344 |
save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
|
| 345 |
+
inputs1 =[macro_prompt]
|
| 346 |
if uploaded_file and uploaded_file.state.name != "FAILED": inputs1.insert(0, uploaded_file)
|
| 347 |
res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1))
|
| 348 |
macro_hypothesis = res1.text
|
|
|
|
| 411 |
|
| 412 |
project_id = vertex_config.get("project_id")
|
| 413 |
location = vertex_config.get("location", "us-central1")
|
| 414 |
+
model_name = vertex_config.get("model_name", "gemini-1.5-pro")
|
| 415 |
+
if not model_name: model_name = "gemini-1.5-pro"
|
| 416 |
max_retries = int(vertex_config.get("max_retries", 1))
|
| 417 |
api_key = vertex_config.get("api_key")
|
| 418 |
|
src/user_analysis_logic.py
CHANGED
|
@@ -1,147 +1,147 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import csv
|
| 3 |
-
import json
|
| 4 |
-
import logging
|
| 5 |
-
import asyncio
|
| 6 |
-
from pathlib import Path
|
| 7 |
-
import inference_logic
|
| 8 |
-
|
| 9 |
-
# Configure Logging
|
| 10 |
-
logger = logging.getLogger(__name__)
|
| 11 |
-
|
| 12 |
-
# --- Prompts for User Analysis ---
|
| 13 |
-
|
| 14 |
-
PROMPT_USER_PROFILING = """
|
| 15 |
-
You are an Expert Intelligence Analyst specializing in Information Integrity and Social Influence Operations.
|
| 16 |
-
|
| 17 |
-
**TASK:**
|
| 18 |
-
Analyze the following timeline of social media posts from a single user: "@{username}".
|
| 19 |
-
Your goal is to construct a "Credibility & Bias Profile" based on their historical behavior.
|
| 20 |
-
|
| 21 |
-
**INPUT DATA (Recent Posts):**
|
| 22 |
-
{timeline_text}
|
| 23 |
-
|
| 24 |
-
**ANALYSIS REQUIREMENTS:**
|
| 25 |
-
1. **Thematic Clusters:** What subjects does this user repeatedly post about? (e.g., "Crypto", "US Politics", "Climate Skepticism").
|
| 26 |
-
2. **Echo Chamber Indicators:** Does the user frequently repost specific domains or engage with specific narratives without adding nuance?
|
| 27 |
-
3. **Emotional Valence:** Analyze the aggregate emotional tone (Alarmist, Neutral, Aggressive, Satirical).
|
| 28 |
-
4. **Bias Detection:** Identify explicit political or ideological biases based on the text.
|
| 29 |
-
5. **Credibility Weighting:** Based on the content, assign a "Historical Credibility Score" (0.0 to 1.0).
|
| 30 |
-
* 0.0 = High frequency of inflammatory/unverified claims.
|
| 31 |
-
* 1.0 = Consistently neutral or verified sourcing.
|
| 32 |
-
|
| 33 |
-
**OUTPUT FORMAT (Strict JSON):**
|
| 34 |
-
{{
|
| 35 |
-
"username": "@{username}",
|
| 36 |
-
"thematic_clusters": ["Topic A", "Topic B"],
|
| 37 |
-
"echo_chamber_detected": boolean,
|
| 38 |
-
"bias_assessment": "Description of bias...",
|
| 39 |
-
"emotional_valence": "Dominant tone...",
|
| 40 |
-
"credibility_score": float,
|
| 41 |
-
"summary_profile": "A concise paragraph summarizing the user's role in the information ecosystem."
|
| 42 |
-
}}
|
| 43 |
-
"""
|
| 44 |
-
|
| 45 |
-
async def load_user_history(username: str, limit: int = 50) -> str:
|
| 46 |
-
"""
|
| 47 |
-
Reads the user's history.csv and formats it into a text block for the LLM.
|
| 48 |
-
"""
|
| 49 |
-
csv_path = Path(f"data/profiles/{username}/history.csv")
|
| 50 |
-
if not csv_path.exists():
|
| 51 |
-
return ""
|
| 52 |
-
|
| 53 |
-
timeline_entries =[]
|
| 54 |
-
try:
|
| 55 |
-
with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
|
| 56 |
-
reader = csv.DictReader(f)
|
| 57 |
-
# Read all, sort by date descending if needed, but scraper usually does desc
|
| 58 |
-
rows = list(reader)
|
| 59 |
-
# Take latest 'limit' posts
|
| 60 |
-
recent_rows = rows[-limit:]
|
| 61 |
-
|
| 62 |
-
for row in recent_rows:
|
| 63 |
-
entry = (
|
| 64 |
-
f"[{row['timestamp']}] "
|
| 65 |
-
f"{'REPOST' if row.get('is_reply')=='True' else 'POST'}: "
|
| 66 |
-
f"\"{row['text']}\" "
|
| 67 |
-
f"(Likes: {row['metric_likes']}, Views: {row['metric_views']})"
|
| 68 |
-
)
|
| 69 |
-
timeline_entries.append(entry)
|
| 70 |
-
except Exception as e:
|
| 71 |
-
logger.error(f"Error reading history for {username}: {e}")
|
| 72 |
-
return ""
|
| 73 |
-
|
| 74 |
-
return "\n".join(timeline_entries)
|
| 75 |
-
|
| 76 |
-
async def generate_user_profile_report(username: str):
|
| 77 |
-
"""
|
| 78 |
-
Orchestrates the analysis pipeline:
|
| 79 |
-
1. Load History.
|
| 80 |
-
2. Construct Prompt.
|
| 81 |
-
3. Call LLM (using Vertex/Gemini config from environment or default).
|
| 82 |
-
4. Save JSON Report.
|
| 83 |
-
"""
|
| 84 |
-
logger.info(f"Starting analysis for user: {username}")
|
| 85 |
-
|
| 86 |
-
timeline_text = await load_user_history(username)
|
| 87 |
-
if not timeline_text:
|
| 88 |
-
return {"error": "No history found or empty timeline."}
|
| 89 |
-
|
| 90 |
-
# Format Prompt
|
| 91 |
-
prompt = PROMPT_USER_PROFILING.format(username=username, timeline_text=timeline_text)
|
| 92 |
-
|
| 93 |
-
# Use Vertex AI by default if configured, else try Gemini Legacy
|
| 94 |
-
# For now, we reuse the pipeline functions in inference_logic if available,
|
| 95 |
-
# or create a direct call here for simplicity.
|
| 96 |
-
|
| 97 |
-
# We'll assume Vertex is the primary backend for this advanced analysis
|
| 98 |
-
# This requires valid credentials in the environment or passed config.
|
| 99 |
-
# Fallback to a placeholder if no model is loaded.
|
| 100 |
-
|
| 101 |
-
report_json = {}
|
| 102 |
-
|
| 103 |
-
try:
|
| 104 |
-
# Attempt to use the existing Vertex Client in inference_logic if initialized
|
| 105 |
-
# Otherwise, we instantiate a quick one if env vars exist
|
| 106 |
-
project_id = os.getenv("VERTEX_PROJECT_ID")
|
| 107 |
-
location = os.getenv("VERTEX_LOCATION", "us-central1")
|
| 108 |
-
api_key = os.getenv("VERTEX_API_KEY")
|
| 109 |
-
|
| 110 |
-
if inference_logic.genai and project_id:
|
| 111 |
-
from google.genai import Client
|
| 112 |
-
from google.genai.types import GenerateContentConfig
|
| 113 |
-
|
| 114 |
-
if api_key:
|
| 115 |
-
client = Client(vertexai=True, project=project_id, location=location, api_key=api_key)
|
| 116 |
-
else:
|
| 117 |
-
client = Client(vertexai=True, project=project_id, location=location)
|
| 118 |
-
|
| 119 |
-
response = client.models.generate_content(
|
| 120 |
-
model="gemini-1.5-pro
|
| 121 |
-
contents=prompt,
|
| 122 |
-
config=GenerateContentConfig(response_mime_type="application/json")
|
| 123 |
-
)
|
| 124 |
-
report_text = response.text
|
| 125 |
-
report_json = json.loads(report_text)
|
| 126 |
-
|
| 127 |
-
else:
|
| 128 |
-
# Fallback Mock for Demo/LITE mode
|
| 129 |
-
logger.warning("Vertex AI credentials not found. Generating Mock Analysis.")
|
| 130 |
-
report_json = {
|
| 131 |
-
"username": f"@{username}",
|
| 132 |
-
"thematic_clusters":["Simulated Topic 1", "Simulated Topic 2"],
|
| 133 |
-
"bias_assessment": "System running in LITE mode. Configure Vertex AI for real analysis.",
|
| 134 |
-
"credibility_score": 0.5,
|
| 135 |
-
"summary_profile": "Mock profile generated because AI backend is not active."
|
| 136 |
-
}
|
| 137 |
-
|
| 138 |
-
except Exception as e:
|
| 139 |
-
logger.error(f"LLM Analysis failed: {e}")
|
| 140 |
-
report_json = {"error": str(e)}
|
| 141 |
-
|
| 142 |
-
# Save Report
|
| 143 |
-
output_path = Path(f"data/profiles/{username}/analysis_report.json")
|
| 144 |
-
with open(output_path, 'w', encoding='utf-8') as f:
|
| 145 |
-
json.dump(report_json, f, indent=2)
|
| 146 |
-
|
| 147 |
-
return report_json
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import csv
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
import asyncio
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
import inference_logic
|
| 8 |
+
|
| 9 |
+
# Configure Logging
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
# --- Prompts for User Analysis ---
|
| 13 |
+
|
| 14 |
+
PROMPT_USER_PROFILING = """
|
| 15 |
+
You are an Expert Intelligence Analyst specializing in Information Integrity and Social Influence Operations.
|
| 16 |
+
|
| 17 |
+
**TASK:**
|
| 18 |
+
Analyze the following timeline of social media posts from a single user: "@{username}".
|
| 19 |
+
Your goal is to construct a "Credibility & Bias Profile" based on their historical behavior.
|
| 20 |
+
|
| 21 |
+
**INPUT DATA (Recent Posts):**
|
| 22 |
+
{timeline_text}
|
| 23 |
+
|
| 24 |
+
**ANALYSIS REQUIREMENTS:**
|
| 25 |
+
1. **Thematic Clusters:** What subjects does this user repeatedly post about? (e.g., "Crypto", "US Politics", "Climate Skepticism").
|
| 26 |
+
2. **Echo Chamber Indicators:** Does the user frequently repost specific domains or engage with specific narratives without adding nuance?
|
| 27 |
+
3. **Emotional Valence:** Analyze the aggregate emotional tone (Alarmist, Neutral, Aggressive, Satirical).
|
| 28 |
+
4. **Bias Detection:** Identify explicit political or ideological biases based on the text.
|
| 29 |
+
5. **Credibility Weighting:** Based on the content, assign a "Historical Credibility Score" (0.0 to 1.0).
|
| 30 |
+
* 0.0 = High frequency of inflammatory/unverified claims.
|
| 31 |
+
* 1.0 = Consistently neutral or verified sourcing.
|
| 32 |
+
|
| 33 |
+
**OUTPUT FORMAT (Strict JSON):**
|
| 34 |
+
{{
|
| 35 |
+
"username": "@{username}",
|
| 36 |
+
"thematic_clusters": ["Topic A", "Topic B"],
|
| 37 |
+
"echo_chamber_detected": boolean,
|
| 38 |
+
"bias_assessment": "Description of bias...",
|
| 39 |
+
"emotional_valence": "Dominant tone...",
|
| 40 |
+
"credibility_score": float,
|
| 41 |
+
"summary_profile": "A concise paragraph summarizing the user's role in the information ecosystem."
|
| 42 |
+
}}
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
async def load_user_history(username: str, limit: int = 50) -> str:
|
| 46 |
+
"""
|
| 47 |
+
Reads the user's history.csv and formats it into a text block for the LLM.
|
| 48 |
+
"""
|
| 49 |
+
csv_path = Path(f"data/profiles/{username}/history.csv")
|
| 50 |
+
if not csv_path.exists():
|
| 51 |
+
return ""
|
| 52 |
+
|
| 53 |
+
timeline_entries =[]
|
| 54 |
+
try:
|
| 55 |
+
with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
|
| 56 |
+
reader = csv.DictReader(f)
|
| 57 |
+
# Read all, sort by date descending if needed, but scraper usually does desc
|
| 58 |
+
rows = list(reader)
|
| 59 |
+
# Take latest 'limit' posts
|
| 60 |
+
recent_rows = rows[-limit:]
|
| 61 |
+
|
| 62 |
+
for row in recent_rows:
|
| 63 |
+
entry = (
|
| 64 |
+
f"[{row['timestamp']}] "
|
| 65 |
+
f"{'REPOST' if row.get('is_reply')=='True' else 'POST'}: "
|
| 66 |
+
f"\"{row['text']}\" "
|
| 67 |
+
f"(Likes: {row['metric_likes']}, Views: {row['metric_views']})"
|
| 68 |
+
)
|
| 69 |
+
timeline_entries.append(entry)
|
| 70 |
+
except Exception as e:
|
| 71 |
+
logger.error(f"Error reading history for {username}: {e}")
|
| 72 |
+
return ""
|
| 73 |
+
|
| 74 |
+
return "\n".join(timeline_entries)
|
| 75 |
+
|
| 76 |
+
async def generate_user_profile_report(username: str):
|
| 77 |
+
"""
|
| 78 |
+
Orchestrates the analysis pipeline:
|
| 79 |
+
1. Load History.
|
| 80 |
+
2. Construct Prompt.
|
| 81 |
+
3. Call LLM (using Vertex/Gemini config from environment or default).
|
| 82 |
+
4. Save JSON Report.
|
| 83 |
+
"""
|
| 84 |
+
logger.info(f"Starting analysis for user: {username}")
|
| 85 |
+
|
| 86 |
+
timeline_text = await load_user_history(username)
|
| 87 |
+
if not timeline_text:
|
| 88 |
+
return {"error": "No history found or empty timeline."}
|
| 89 |
+
|
| 90 |
+
# Format Prompt
|
| 91 |
+
prompt = PROMPT_USER_PROFILING.format(username=username, timeline_text=timeline_text)
|
| 92 |
+
|
| 93 |
+
# Use Vertex AI by default if configured, else try Gemini Legacy
|
| 94 |
+
# For now, we reuse the pipeline functions in inference_logic if available,
|
| 95 |
+
# or create a direct call here for simplicity.
|
| 96 |
+
|
| 97 |
+
# We'll assume Vertex is the primary backend for this advanced analysis
|
| 98 |
+
# This requires valid credentials in the environment or passed config.
|
| 99 |
+
# Fallback to a placeholder if no model is loaded.
|
| 100 |
+
|
| 101 |
+
report_json = {}
|
| 102 |
+
|
| 103 |
+
try:
|
| 104 |
+
# Attempt to use the existing Vertex Client in inference_logic if initialized
|
| 105 |
+
# Otherwise, we instantiate a quick one if env vars exist
|
| 106 |
+
project_id = os.getenv("VERTEX_PROJECT_ID")
|
| 107 |
+
location = os.getenv("VERTEX_LOCATION", "us-central1")
|
| 108 |
+
api_key = os.getenv("VERTEX_API_KEY")
|
| 109 |
+
|
| 110 |
+
if inference_logic.genai and project_id:
|
| 111 |
+
from google.genai import Client
|
| 112 |
+
from google.genai.types import GenerateContentConfig
|
| 113 |
+
|
| 114 |
+
if api_key:
|
| 115 |
+
client = Client(vertexai=True, project=project_id, location=location, api_key=api_key)
|
| 116 |
+
else:
|
| 117 |
+
client = Client(vertexai=True, project=project_id, location=location)
|
| 118 |
+
|
| 119 |
+
response = client.models.generate_content(
|
| 120 |
+
model="gemini-1.5-pro",
|
| 121 |
+
contents=prompt,
|
| 122 |
+
config=GenerateContentConfig(response_mime_type="application/json")
|
| 123 |
+
)
|
| 124 |
+
report_text = response.text
|
| 125 |
+
report_json = json.loads(report_text)
|
| 126 |
+
|
| 127 |
+
else:
|
| 128 |
+
# Fallback Mock for Demo/LITE mode
|
| 129 |
+
logger.warning("Vertex AI credentials not found. Generating Mock Analysis.")
|
| 130 |
+
report_json = {
|
| 131 |
+
"username": f"@{username}",
|
| 132 |
+
"thematic_clusters":["Simulated Topic 1", "Simulated Topic 2"],
|
| 133 |
+
"bias_assessment": "System running in LITE mode. Configure Vertex AI for real analysis.",
|
| 134 |
+
"credibility_score": 0.5,
|
| 135 |
+
"summary_profile": "Mock profile generated because AI backend is not active."
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
except Exception as e:
|
| 139 |
+
logger.error(f"LLM Analysis failed: {e}")
|
| 140 |
+
report_json = {"error": str(e)}
|
| 141 |
+
|
| 142 |
+
# Save Report
|
| 143 |
+
output_path = Path(f"data/profiles/{username}/analysis_report.json")
|
| 144 |
+
with open(output_path, 'w', encoding='utf-8') as f:
|
| 145 |
+
json.dump(report_json, f, indent=2)
|
| 146 |
+
|
| 147 |
+
return report_json
|