GlazedDon0t commited on
Commit
5dae8fe
·
1 Parent(s): 4d2be90
frontend/src/App.tsx CHANGED
@@ -15,7 +15,7 @@ function App() {
15
  const logContainerRef = useRef<HTMLDivElement>(null);
16
 
17
  // Processing Config State
18
- const [modelProvider, setModelProvider] = useState('nrp');
19
  const[apiKey, setApiKey] = useState('');
20
  const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
21
  const[modelName, setModelName] = useState('qwen3'); // Default
@@ -23,32 +23,32 @@ function App() {
23
  const [location, setLocation] = useState('us-central1');
24
  const[includeComments, setIncludeComments] = useState(false);
25
  const[reasoningMethod, setReasoningMethod] = useState('cot');
26
- const [promptTemplate, setPromptTemplate] = useState('standard');
27
  const[customQuery, setCustomQuery] = useState('');
28
  const [maxRetries, setMaxRetries] = useState(1);
29
- const [availablePrompts, setAvailablePrompts] = useState<any[]>([]);
30
 
31
- const [useSearch, setUseSearch] = useState(false);
32
  const[useCode, setUseCode] = useState(false);
33
 
34
  // Data States
35
  const[queueList, setQueueList] = useState<any[]>([]);
36
  const [selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
37
- const [expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
38
  const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
39
 
40
- const [singleLinkInput, setSingleLinkInput] = useState('');
41
  const [profileList, setProfileList] = useState<any[]>([]);
42
  const[selectedProfile, setSelectedProfile] = useState<any>(null);
43
  const [profilePosts, setProfilePosts] = useState<any[]>([]);
44
- const [integrityBoard, setIntegrityBoard] = useState<any[]>([]);
45
 
46
  const[datasetList, setDatasetList] = useState<any[]>([]);
47
  const[selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
48
  const[lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
49
 
50
  const [benchmarks, setBenchmarks] = useState<any>(null);
51
- const [leaderboard, setLeaderboard] = useState<any[]>([]);
52
  const[refreshTrigger, setRefreshTrigger] = useState(0);
53
 
54
  // Tags
@@ -57,13 +57,13 @@ function App() {
57
  // Manual Labeling State
58
  const[manualLink, setManualLink] = useState('');
59
  const [manualCaption, setManualCaption] = useState('');
60
- const [manualTags, setManualTags] = useState('');
61
  const[manualReasoning, setManualReasoning] = useState('');
62
  const[manualScores, setManualScores] = useState({
63
  visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
64
  va: 5, vc: 5, ac: 5, final: 50
65
  });
66
- const [showRubric, setShowRubric] = useState(false);
67
  const[aiReference, setAiReference] = useState<any>(null);
68
  const[labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
69
  const[labelFilter, setLabelFilter] = useState('');
@@ -77,7 +77,7 @@ function App() {
77
  const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
78
 
79
  // Resampling configuration
80
- const [resampleCount, setResampleCount] = useState<number>(1);
81
 
82
  // Drag Selection references
83
  const isDraggingQueueRef = useRef(false);
@@ -85,10 +85,10 @@ function App() {
85
 
86
  // Quick Demo State
87
  const[demoLink, setDemoLink] = useState('');
88
- const [demoLogs, setDemoLogs] = useState('');
89
  const[demoIsProcessing, setDemoIsProcessing] = useState(false);
90
  const[demoResult, setDemoResult] = useState<any>(null);
91
- const [showDemoConfig, setShowDemoConfig] = useState(false);
92
  const demoLogContainerRef = useRef<HTMLDivElement>(null);
93
 
94
  useEffect(() => {
@@ -696,8 +696,9 @@ function App() {
696
  return (
697
  <div className="flex h-screen w-full bg-[#09090b] text-slate-200 font-sans overflow-hidden">
698
  <datalist id="modelSuggestions">
699
- <option value="gemini-1.5-pro-preview-0409" />
700
- <option value="gemini-2.0-flash-exp" />
 
701
  <option value="qwen3" />
702
  <option value="gpt-oss" />
703
  <option value="kimi" />
 
15
  const logContainerRef = useRef<HTMLDivElement>(null);
16
 
17
  // Processing Config State
18
+ const[modelProvider, setModelProvider] = useState('nrp');
19
  const[apiKey, setApiKey] = useState('');
20
  const[baseUrl, setBaseUrl] = useState('https://ellm.nrp-nautilus.io/v1'); // NRP Default
21
  const[modelName, setModelName] = useState('qwen3'); // Default
 
23
  const [location, setLocation] = useState('us-central1');
24
  const[includeComments, setIncludeComments] = useState(false);
25
  const[reasoningMethod, setReasoningMethod] = useState('cot');
26
+ const[promptTemplate, setPromptTemplate] = useState('standard');
27
  const[customQuery, setCustomQuery] = useState('');
28
  const [maxRetries, setMaxRetries] = useState(1);
29
+ const[availablePrompts, setAvailablePrompts] = useState<any[]>([]);
30
 
31
+ const[useSearch, setUseSearch] = useState(false);
32
  const[useCode, setUseCode] = useState(false);
33
 
34
  // Data States
35
  const[queueList, setQueueList] = useState<any[]>([]);
36
  const [selectedQueueItems, setSelectedQueueItems] = useState<Set<string>>(new Set());
37
+ const[expandedQueueItems, setExpandedQueueItems] = useState<Set<string>>(new Set());
38
  const[lastQueueIndex, setLastQueueIndex] = useState<number | null>(null);
39
 
40
+ const[singleLinkInput, setSingleLinkInput] = useState('');
41
  const [profileList, setProfileList] = useState<any[]>([]);
42
  const[selectedProfile, setSelectedProfile] = useState<any>(null);
43
  const [profilePosts, setProfilePosts] = useState<any[]>([]);
44
+ const[integrityBoard, setIntegrityBoard] = useState<any[]>([]);
45
 
46
  const[datasetList, setDatasetList] = useState<any[]>([]);
47
  const[selectedItems, setSelectedItems] = useState<Set<string>>(new Set());
48
  const[lastDatasetIndex, setLastDatasetIndex] = useState<number | null>(null);
49
 
50
  const [benchmarks, setBenchmarks] = useState<any>(null);
51
+ const[leaderboard, setLeaderboard] = useState<any[]>([]);
52
  const[refreshTrigger, setRefreshTrigger] = useState(0);
53
 
54
  // Tags
 
57
  // Manual Labeling State
58
  const[manualLink, setManualLink] = useState('');
59
  const [manualCaption, setManualCaption] = useState('');
60
+ const[manualTags, setManualTags] = useState('');
61
  const[manualReasoning, setManualReasoning] = useState('');
62
  const[manualScores, setManualScores] = useState({
63
  visual: 5, audio: 5, source: 5, logic: 5, emotion: 5,
64
  va: 5, vc: 5, ac: 5, final: 50
65
  });
66
+ const[showRubric, setShowRubric] = useState(false);
67
  const[aiReference, setAiReference] = useState<any>(null);
68
  const[labelBrowserMode, setLabelBrowserMode] = useState<'queue' | 'dataset'>('queue');
69
  const[labelFilter, setLabelFilter] = useState('');
 
77
  const [agentConfig, setAgentConfig] = useState({ use_search: true, use_code: false });
78
 
79
  // Resampling configuration
80
+ const[resampleCount, setResampleCount] = useState<number>(1);
81
 
82
  // Drag Selection references
83
  const isDraggingQueueRef = useRef(false);
 
85
 
86
  // Quick Demo State
87
  const[demoLink, setDemoLink] = useState('');
88
+ const[demoLogs, setDemoLogs] = useState('');
89
  const[demoIsProcessing, setDemoIsProcessing] = useState(false);
90
  const[demoResult, setDemoResult] = useState<any>(null);
91
+ const[showDemoConfig, setShowDemoConfig] = useState(false);
92
  const demoLogContainerRef = useRef<HTMLDivElement>(null);
93
 
94
  useEffect(() => {
 
696
  return (
697
  <div className="flex h-screen w-full bg-[#09090b] text-slate-200 font-sans overflow-hidden">
698
  <datalist id="modelSuggestions">
699
+ <option value="gemini-1.5-pro" />
700
+ <option value="gemini-1.5-flash" />
701
+ <option value="gemini-2.0-flash" />
702
  <option value="qwen3" />
703
  <option value="gpt-oss" />
704
  <option value="kimi" />
src/agent_logic.py CHANGED
@@ -42,7 +42,7 @@ async def _analyze_video_async(video_url: str, context: str, agent_config: dict)
42
  api_key = agent_config.get("api_key", os.getenv("GEMINI_API_KEY", ""))
43
  project_id = agent_config.get("project_id", os.getenv("VERTEX_PROJECT_ID", ""))
44
  location = agent_config.get("location", os.getenv("VERTEX_LOCATION", "us-central1"))
45
- model_name = agent_config.get("model_name", os.getenv("VERTEX_MODEL_NAME", "gemini-1.5-pro-preview-0409"))
46
  reasoning_method = agent_config.get("reasoning_method", "cot")
47
  prompt_template = agent_config.get("prompt_template", "standard")
48
 
@@ -325,7 +325,7 @@ def create_a2a_app():
325
  )
326
 
327
  if provider == 'vertex' and not project_id:
328
- reply = f"Welcome to the LiarMP4 Agent Nexus!\n\nIt looks like you haven't configured **Vertex AI** yet. Please enter your Google Cloud Project ID in the 'Inference Config' panel on the left, or tell me directly: *'set project id to [YOUR_PROJECT]'*.\n\n{base_capabilities}"
329
  elif provider == 'gemini' and not api_key:
330
  reply = f"👋 Welcome to the LiarMP4 Agent Nexus!\n\nIt looks like you haven't configured **Gemini** yet. Please enter your API Key in the 'Inference Config' panel on the left, or tell me directly: *'set api key to[YOUR_KEY]'*.\n\n{base_capabilities}"
331
  else:
 
42
  api_key = agent_config.get("api_key", os.getenv("GEMINI_API_KEY", ""))
43
  project_id = agent_config.get("project_id", os.getenv("VERTEX_PROJECT_ID", ""))
44
  location = agent_config.get("location", os.getenv("VERTEX_LOCATION", "us-central1"))
45
+ model_name = agent_config.get("model_name", os.getenv("VERTEX_MODEL_NAME", "gemini-1.5-pro"))
46
  reasoning_method = agent_config.get("reasoning_method", "cot")
47
  prompt_template = agent_config.get("prompt_template", "standard")
48
 
 
325
  )
326
 
327
  if provider == 'vertex' and not project_id:
328
+ reply = f"Welcome to the LiarMP4 Agent Nexus!\n\nIt looks like you haven't configured **Vertex AI** yet. Please enter your Google Cloud Project ID in the 'Inference Config' panel on the left, or tell me directly: *'set project id to[YOUR_PROJECT]'*.\n\n{base_capabilities}"
329
  elif provider == 'gemini' and not api_key:
330
  reply = f"👋 Welcome to the LiarMP4 Agent Nexus!\n\nIt looks like you haven't configured **Gemini** yet. Please enter your API Key in the 'Inference Config' panel on the left, or tell me directly: *'set api key to[YOUR_KEY]'*.\n\n{base_capabilities}"
331
  else:
src/inference_logic.py CHANGED
@@ -140,7 +140,9 @@ async def generate_simple_text(prompt: str, model_type: str, config: dict):
140
  if model_type == 'gemini':
141
  if genai_legacy is None: return "Error: Legacy SDK missing."
142
  genai_legacy.configure(api_key=config.get("api_key"))
143
- model = genai_legacy.GenerativeModel(config.get("model_name", "models/gemini-2.0-flash-exp"))
 
 
144
  response = await loop.run_in_executor(
145
  None,
146
  lambda: model.generate_content(prompt, generation_config={"temperature": 0.0})
@@ -157,7 +159,7 @@ async def generate_simple_text(prompt: str, model_type: str, config: dict):
157
  response = await loop.run_in_executor(
158
  None,
159
  lambda: cl.models.generate_content(
160
- model=config['model_name'],
161
  contents=prompt,
162
  config=GenerateContentConfig(temperature=0.0)
163
  )
@@ -298,7 +300,10 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
298
  active_tools.append({"code_execution": {}})
299
  system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
300
 
301
- model = genai_legacy.GenerativeModel("models/gemini-2.0-flash-exp", tools=active_tools if active_tools else None)
 
 
 
302
  toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
303
  score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
304
  tag_list_text = get_formatted_tag_list()
@@ -337,7 +342,7 @@ async def run_gemini_labeling_pipeline(video_path: str, caption: str, transcript
337
 
338
  macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
339
  save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
340
- inputs1 = [macro_prompt]
341
  if uploaded_file and uploaded_file.state.name != "FAILED": inputs1.insert(0, uploaded_file)
342
  res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1))
343
  macro_hypothesis = res1.text
@@ -406,7 +411,8 @@ async def run_vertex_labeling_pipeline(video_path: str, caption: str, transcript
406
 
407
  project_id = vertex_config.get("project_id")
408
  location = vertex_config.get("location", "us-central1")
409
- model_name = vertex_config.get("model_name", "gemini-1.5-pro-preview-0409")
 
410
  max_retries = int(vertex_config.get("max_retries", 1))
411
  api_key = vertex_config.get("api_key")
412
 
 
140
  if model_type == 'gemini':
141
  if genai_legacy is None: return "Error: Legacy SDK missing."
142
  genai_legacy.configure(api_key=config.get("api_key"))
143
+ model_name = config.get("model_name", "gemini-1.5-pro")
144
+ if not model_name: model_name = "gemini-1.5-pro"
145
+ model = genai_legacy.GenerativeModel(model_name)
146
  response = await loop.run_in_executor(
147
  None,
148
  lambda: model.generate_content(prompt, generation_config={"temperature": 0.0})
 
159
  response = await loop.run_in_executor(
160
  None,
161
  lambda: cl.models.generate_content(
162
+ model=config.get('model_name', 'gemini-1.5-pro'),
163
  contents=prompt,
164
  config=GenerateContentConfig(temperature=0.0)
165
  )
 
300
  active_tools.append({"code_execution": {}})
301
  system_persona += "\n- You MUST use the Code Execution tool for any necessary calculations, data processing, or statistical verifications."
302
 
303
+ model_name = gemini_config.get("model_name", "gemini-1.5-pro")
304
+ if not model_name: model_name = "gemini-1.5-pro"
305
+ model = genai_legacy.GenerativeModel(model_name, tools=active_tools if active_tools else None)
306
+
307
  toon_schema = SCHEMA_REASONING if include_comments else SCHEMA_SIMPLE
308
  score_instructions = SCORE_INSTRUCTIONS_REASONING if include_comments else SCORE_INSTRUCTIONS_SIMPLE
309
  tag_list_text = get_formatted_tag_list()
 
342
 
343
  macro_prompt = FCOT_MACRO_PROMPT.format(system_persona=system_persona, caption=caption, transcript=transcript)
344
  save_debug_log(request_id, 'prompt', macro_prompt, attempt, 'fcot_macro')
345
+ inputs1 =[macro_prompt]
346
  if uploaded_file and uploaded_file.state.name != "FAILED": inputs1.insert(0, uploaded_file)
347
  res1 = await loop.run_in_executor(None, lambda: chat.send_message(inputs1))
348
  macro_hypothesis = res1.text
 
411
 
412
  project_id = vertex_config.get("project_id")
413
  location = vertex_config.get("location", "us-central1")
414
+ model_name = vertex_config.get("model_name", "gemini-1.5-pro")
415
+ if not model_name: model_name = "gemini-1.5-pro"
416
  max_retries = int(vertex_config.get("max_retries", 1))
417
  api_key = vertex_config.get("api_key")
418
 
src/user_analysis_logic.py CHANGED
@@ -1,147 +1,147 @@
1
- import os
2
- import csv
3
- import json
4
- import logging
5
- import asyncio
6
- from pathlib import Path
7
- import inference_logic
8
-
9
- # Configure Logging
10
- logger = logging.getLogger(__name__)
11
-
12
- # --- Prompts for User Analysis ---
13
-
14
- PROMPT_USER_PROFILING = """
15
- You are an Expert Intelligence Analyst specializing in Information Integrity and Social Influence Operations.
16
-
17
- **TASK:**
18
- Analyze the following timeline of social media posts from a single user: "@{username}".
19
- Your goal is to construct a "Credibility & Bias Profile" based on their historical behavior.
20
-
21
- **INPUT DATA (Recent Posts):**
22
- {timeline_text}
23
-
24
- **ANALYSIS REQUIREMENTS:**
25
- 1. **Thematic Clusters:** What subjects does this user repeatedly post about? (e.g., "Crypto", "US Politics", "Climate Skepticism").
26
- 2. **Echo Chamber Indicators:** Does the user frequently repost specific domains or engage with specific narratives without adding nuance?
27
- 3. **Emotional Valence:** Analyze the aggregate emotional tone (Alarmist, Neutral, Aggressive, Satirical).
28
- 4. **Bias Detection:** Identify explicit political or ideological biases based on the text.
29
- 5. **Credibility Weighting:** Based on the content, assign a "Historical Credibility Score" (0.0 to 1.0).
30
- * 0.0 = High frequency of inflammatory/unverified claims.
31
- * 1.0 = Consistently neutral or verified sourcing.
32
-
33
- **OUTPUT FORMAT (Strict JSON):**
34
- {{
35
- "username": "@{username}",
36
- "thematic_clusters": ["Topic A", "Topic B"],
37
- "echo_chamber_detected": boolean,
38
- "bias_assessment": "Description of bias...",
39
- "emotional_valence": "Dominant tone...",
40
- "credibility_score": float,
41
- "summary_profile": "A concise paragraph summarizing the user's role in the information ecosystem."
42
- }}
43
- """
44
-
45
- async def load_user_history(username: str, limit: int = 50) -> str:
46
- """
47
- Reads the user's history.csv and formats it into a text block for the LLM.
48
- """
49
- csv_path = Path(f"data/profiles/{username}/history.csv")
50
- if not csv_path.exists():
51
- return ""
52
-
53
- timeline_entries =[]
54
- try:
55
- with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
56
- reader = csv.DictReader(f)
57
- # Read all, sort by date descending if needed, but scraper usually does desc
58
- rows = list(reader)
59
- # Take latest 'limit' posts
60
- recent_rows = rows[-limit:]
61
-
62
- for row in recent_rows:
63
- entry = (
64
- f"[{row['timestamp']}] "
65
- f"{'REPOST' if row.get('is_reply')=='True' else 'POST'}: "
66
- f"\"{row['text']}\" "
67
- f"(Likes: {row['metric_likes']}, Views: {row['metric_views']})"
68
- )
69
- timeline_entries.append(entry)
70
- except Exception as e:
71
- logger.error(f"Error reading history for {username}: {e}")
72
- return ""
73
-
74
- return "\n".join(timeline_entries)
75
-
76
- async def generate_user_profile_report(username: str):
77
- """
78
- Orchestrates the analysis pipeline:
79
- 1. Load History.
80
- 2. Construct Prompt.
81
- 3. Call LLM (using Vertex/Gemini config from environment or default).
82
- 4. Save JSON Report.
83
- """
84
- logger.info(f"Starting analysis for user: {username}")
85
-
86
- timeline_text = await load_user_history(username)
87
- if not timeline_text:
88
- return {"error": "No history found or empty timeline."}
89
-
90
- # Format Prompt
91
- prompt = PROMPT_USER_PROFILING.format(username=username, timeline_text=timeline_text)
92
-
93
- # Use Vertex AI by default if configured, else try Gemini Legacy
94
- # For now, we reuse the pipeline functions in inference_logic if available,
95
- # or create a direct call here for simplicity.
96
-
97
- # We'll assume Vertex is the primary backend for this advanced analysis
98
- # This requires valid credentials in the environment or passed config.
99
- # Fallback to a placeholder if no model is loaded.
100
-
101
- report_json = {}
102
-
103
- try:
104
- # Attempt to use the existing Vertex Client in inference_logic if initialized
105
- # Otherwise, we instantiate a quick one if env vars exist
106
- project_id = os.getenv("VERTEX_PROJECT_ID")
107
- location = os.getenv("VERTEX_LOCATION", "us-central1")
108
- api_key = os.getenv("VERTEX_API_KEY")
109
-
110
- if inference_logic.genai and project_id:
111
- from google.genai import Client
112
- from google.genai.types import GenerateContentConfig
113
-
114
- if api_key:
115
- client = Client(vertexai=True, project=project_id, location=location, api_key=api_key)
116
- else:
117
- client = Client(vertexai=True, project=project_id, location=location)
118
-
119
- response = client.models.generate_content(
120
- model="gemini-1.5-pro-preview-0409",
121
- contents=prompt,
122
- config=GenerateContentConfig(response_mime_type="application/json")
123
- )
124
- report_text = response.text
125
- report_json = json.loads(report_text)
126
-
127
- else:
128
- # Fallback Mock for Demo/LITE mode
129
- logger.warning("Vertex AI credentials not found. Generating Mock Analysis.")
130
- report_json = {
131
- "username": f"@{username}",
132
- "thematic_clusters":["Simulated Topic 1", "Simulated Topic 2"],
133
- "bias_assessment": "System running in LITE mode. Configure Vertex AI for real analysis.",
134
- "credibility_score": 0.5,
135
- "summary_profile": "Mock profile generated because AI backend is not active."
136
- }
137
-
138
- except Exception as e:
139
- logger.error(f"LLM Analysis failed: {e}")
140
- report_json = {"error": str(e)}
141
-
142
- # Save Report
143
- output_path = Path(f"data/profiles/{username}/analysis_report.json")
144
- with open(output_path, 'w', encoding='utf-8') as f:
145
- json.dump(report_json, f, indent=2)
146
-
147
- return report_json
 
1
+ import os
2
+ import csv
3
+ import json
4
+ import logging
5
+ import asyncio
6
+ from pathlib import Path
7
+ import inference_logic
8
+
9
+ # Configure Logging
10
+ logger = logging.getLogger(__name__)
11
+
12
+ # --- Prompts for User Analysis ---
13
+
14
+ PROMPT_USER_PROFILING = """
15
+ You are an Expert Intelligence Analyst specializing in Information Integrity and Social Influence Operations.
16
+
17
+ **TASK:**
18
+ Analyze the following timeline of social media posts from a single user: "@{username}".
19
+ Your goal is to construct a "Credibility & Bias Profile" based on their historical behavior.
20
+
21
+ **INPUT DATA (Recent Posts):**
22
+ {timeline_text}
23
+
24
+ **ANALYSIS REQUIREMENTS:**
25
+ 1. **Thematic Clusters:** What subjects does this user repeatedly post about? (e.g., "Crypto", "US Politics", "Climate Skepticism").
26
+ 2. **Echo Chamber Indicators:** Does the user frequently repost specific domains or engage with specific narratives without adding nuance?
27
+ 3. **Emotional Valence:** Analyze the aggregate emotional tone (Alarmist, Neutral, Aggressive, Satirical).
28
+ 4. **Bias Detection:** Identify explicit political or ideological biases based on the text.
29
+ 5. **Credibility Weighting:** Based on the content, assign a "Historical Credibility Score" (0.0 to 1.0).
30
+ * 0.0 = High frequency of inflammatory/unverified claims.
31
+ * 1.0 = Consistently neutral or verified sourcing.
32
+
33
+ **OUTPUT FORMAT (Strict JSON):**
34
+ {{
35
+ "username": "@{username}",
36
+ "thematic_clusters": ["Topic A", "Topic B"],
37
+ "echo_chamber_detected": boolean,
38
+ "bias_assessment": "Description of bias...",
39
+ "emotional_valence": "Dominant tone...",
40
+ "credibility_score": float,
41
+ "summary_profile": "A concise paragraph summarizing the user's role in the information ecosystem."
42
+ }}
43
+ """
44
+
45
+ async def load_user_history(username: str, limit: int = 50) -> str:
46
+ """
47
+ Reads the user's history.csv and formats it into a text block for the LLM.
48
+ """
49
+ csv_path = Path(f"data/profiles/{username}/history.csv")
50
+ if not csv_path.exists():
51
+ return ""
52
+
53
+ timeline_entries =[]
54
+ try:
55
+ with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
56
+ reader = csv.DictReader(f)
57
+ # Read all, sort by date descending if needed, but scraper usually does desc
58
+ rows = list(reader)
59
+ # Take latest 'limit' posts
60
+ recent_rows = rows[-limit:]
61
+
62
+ for row in recent_rows:
63
+ entry = (
64
+ f"[{row['timestamp']}] "
65
+ f"{'REPOST' if row.get('is_reply')=='True' else 'POST'}: "
66
+ f"\"{row['text']}\" "
67
+ f"(Likes: {row['metric_likes']}, Views: {row['metric_views']})"
68
+ )
69
+ timeline_entries.append(entry)
70
+ except Exception as e:
71
+ logger.error(f"Error reading history for {username}: {e}")
72
+ return ""
73
+
74
+ return "\n".join(timeline_entries)
75
+
76
+ async def generate_user_profile_report(username: str):
77
+ """
78
+ Orchestrates the analysis pipeline:
79
+ 1. Load History.
80
+ 2. Construct Prompt.
81
+ 3. Call LLM (using Vertex/Gemini config from environment or default).
82
+ 4. Save JSON Report.
83
+ """
84
+ logger.info(f"Starting analysis for user: {username}")
85
+
86
+ timeline_text = await load_user_history(username)
87
+ if not timeline_text:
88
+ return {"error": "No history found or empty timeline."}
89
+
90
+ # Format Prompt
91
+ prompt = PROMPT_USER_PROFILING.format(username=username, timeline_text=timeline_text)
92
+
93
+ # Use Vertex AI by default if configured, else try Gemini Legacy
94
+ # For now, we reuse the pipeline functions in inference_logic if available,
95
+ # or create a direct call here for simplicity.
96
+
97
+ # We'll assume Vertex is the primary backend for this advanced analysis
98
+ # This requires valid credentials in the environment or passed config.
99
+ # Fallback to a placeholder if no model is loaded.
100
+
101
+ report_json = {}
102
+
103
+ try:
104
+ # Attempt to use the existing Vertex Client in inference_logic if initialized
105
+ # Otherwise, we instantiate a quick one if env vars exist
106
+ project_id = os.getenv("VERTEX_PROJECT_ID")
107
+ location = os.getenv("VERTEX_LOCATION", "us-central1")
108
+ api_key = os.getenv("VERTEX_API_KEY")
109
+
110
+ if inference_logic.genai and project_id:
111
+ from google.genai import Client
112
+ from google.genai.types import GenerateContentConfig
113
+
114
+ if api_key:
115
+ client = Client(vertexai=True, project=project_id, location=location, api_key=api_key)
116
+ else:
117
+ client = Client(vertexai=True, project=project_id, location=location)
118
+
119
+ response = client.models.generate_content(
120
+ model="gemini-1.5-pro",
121
+ contents=prompt,
122
+ config=GenerateContentConfig(response_mime_type="application/json")
123
+ )
124
+ report_text = response.text
125
+ report_json = json.loads(report_text)
126
+
127
+ else:
128
+ # Fallback Mock for Demo/LITE mode
129
+ logger.warning("Vertex AI credentials not found. Generating Mock Analysis.")
130
+ report_json = {
131
+ "username": f"@{username}",
132
+ "thematic_clusters":["Simulated Topic 1", "Simulated Topic 2"],
133
+ "bias_assessment": "System running in LITE mode. Configure Vertex AI for real analysis.",
134
+ "credibility_score": 0.5,
135
+ "summary_profile": "Mock profile generated because AI backend is not active."
136
+ }
137
+
138
+ except Exception as e:
139
+ logger.error(f"LLM Analysis failed: {e}")
140
+ report_json = {"error": str(e)}
141
+
142
+ # Save Report
143
+ output_path = Path(f"data/profiles/{username}/analysis_report.json")
144
+ with open(output_path, 'w', encoding='utf-8') as f:
145
+ json.dump(report_json, f, indent=2)
146
+
147
+ return report_json