| | import os |
| | import csv |
| | import json |
| | import logging |
| | import asyncio |
| | from pathlib import Path |
| | import inference_logic |
| |
|
| | |
| | logger = logging.getLogger(__name__) |
| |
|
| | |
| |
|
| | PROMPT_USER_PROFILING = """ |
| | You are an Expert Intelligence Analyst specializing in Information Integrity and Social Influence Operations. |
| | |
| | **TASK:** |
| | Analyze the following timeline of social media posts from a single user: "@{username}". |
| | Your goal is to construct a "Credibility & Bias Profile" based on their historical behavior. |
| | |
| | **INPUT DATA (Recent Posts):** |
| | {timeline_text} |
| | |
| | **ANALYSIS REQUIREMENTS:** |
| | 1. **Thematic Clusters:** What subjects does this user repeatedly post about? (e.g., "Crypto", "US Politics", "Climate Skepticism"). |
| | 2. **Echo Chamber Indicators:** Does the user frequently repost specific domains or engage with specific narratives without adding nuance? |
| | 3. **Emotional Valence:** Analyze the aggregate emotional tone (Alarmist, Neutral, Aggressive, Satirical). |
| | 4. **Bias Detection:** Identify explicit political or ideological biases based on the text. |
| | 5. **Credibility Weighting:** Based on the content, assign a "Historical Credibility Score" (0.0 to 1.0). |
| | * 0.0 = High frequency of inflammatory/unverified claims. |
| | * 1.0 = Consistently neutral or verified sourcing. |
| | |
| | **OUTPUT FORMAT (Strict JSON):** |
| | {{ |
| | "username": "@{username}", |
| | "thematic_clusters": ["Topic A", "Topic B"], |
| | "echo_chamber_detected": boolean, |
| | "bias_assessment": "Description of bias...", |
| | "emotional_valence": "Dominant tone...", |
| | "credibility_score": float, |
| | "summary_profile": "A concise paragraph summarizing the user's role in the information ecosystem." |
| | }} |
| | """ |
| |
|
| | async def load_user_history(username: str, limit: int = 50) -> str: |
| | """ |
| | Reads the user's history.csv and formats it into a text block for the LLM. |
| | """ |
| | csv_path = Path(f"data/profiles/{username}/history.csv") |
| | if not csv_path.exists(): |
| | return "" |
| |
|
| | timeline_entries =[] |
| | try: |
| | with open(csv_path, 'r', encoding='utf-8', errors='replace') as f: |
| | reader = csv.DictReader(f) |
| | |
| | rows = list(reader) |
| | |
| | recent_rows = rows[-limit:] |
| | |
| | for row in recent_rows: |
| | entry = ( |
| | f"[{row['timestamp']}] " |
| | f"{'REPOST' if row.get('is_reply')=='True' else 'POST'}: " |
| | f"\"{row['text']}\" " |
| | f"(Likes: {row['metric_likes']}, Views: {row['metric_views']})" |
| | ) |
| | timeline_entries.append(entry) |
| | except Exception as e: |
| | logger.error(f"Error reading history for {username}: {e}") |
| | return "" |
| |
|
| | return "\n".join(timeline_entries) |
| |
|
| | async def generate_user_profile_report(username: str): |
| | """ |
| | Orchestrates the analysis pipeline: |
| | 1. Load History. |
| | 2. Construct Prompt. |
| | 3. Call LLM (using Vertex/Gemini config from environment or default). |
| | 4. Save JSON Report. |
| | """ |
| | logger.info(f"Starting analysis for user: {username}") |
| | |
| | timeline_text = await load_user_history(username) |
| | if not timeline_text: |
| | return {"error": "No history found or empty timeline."} |
| |
|
| | |
| | prompt = PROMPT_USER_PROFILING.format(username=username, timeline_text=timeline_text) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | report_json = {} |
| | |
| | try: |
| | |
| | |
| | project_id = os.getenv("VERTEX_PROJECT_ID") |
| | location = os.getenv("VERTEX_LOCATION", "us-central1") |
| | api_key = os.getenv("VERTEX_API_KEY") |
| | |
| | if inference_logic.genai and project_id: |
| | from google.genai import Client |
| | from google.genai.types import GenerateContentConfig |
| | |
| | if api_key: |
| | client = Client(vertexai=True, project=project_id, location=location, api_key=api_key) |
| | else: |
| | client = Client(vertexai=True, project=project_id, location=location) |
| | |
| | response = client.models.generate_content( |
| | model="gemini-1.5-pro", |
| | contents=prompt, |
| | config=GenerateContentConfig(response_mime_type="application/json") |
| | ) |
| | report_text = response.text |
| | report_json = json.loads(report_text) |
| | |
| | else: |
| | |
| | logger.warning("Vertex AI credentials not found. Generating Mock Analysis.") |
| | report_json = { |
| | "username": f"@{username}", |
| | "thematic_clusters":["Simulated Topic 1", "Simulated Topic 2"], |
| | "bias_assessment": "System running in LITE mode. Configure Vertex AI for real analysis.", |
| | "credibility_score": 0.5, |
| | "summary_profile": "Mock profile generated because AI backend is not active." |
| | } |
| |
|
| | except Exception as e: |
| | logger.error(f"LLM Analysis failed: {e}") |
| | report_json = {"error": str(e)} |
| |
|
| | |
| | output_path = Path(f"data/profiles/{username}/analysis_report.json") |
| | with open(output_path, 'w', encoding='utf-8') as f: |
| | json.dump(report_json, f, indent=2) |
| |
|
| | return report_json |
| |
|