File size: 5,850 Bytes
5dae8fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import os
import csv
import json
import logging
import asyncio
from pathlib import Path
import inference_logic

# Configure Logging
logger = logging.getLogger(__name__)

# --- Prompts for User Analysis ---

PROMPT_USER_PROFILING = """
You are an Expert Intelligence Analyst specializing in Information Integrity and Social Influence Operations.

**TASK:**
Analyze the following timeline of social media posts from a single user: "@{username}".
Your goal is to construct a "Credibility & Bias Profile" based on their historical behavior.

**INPUT DATA (Recent Posts):**
{timeline_text}

**ANALYSIS REQUIREMENTS:**
1.  **Thematic Clusters:** What subjects does this user repeatedly post about? (e.g., "Crypto", "US Politics", "Climate Skepticism").
2.  **Echo Chamber Indicators:** Does the user frequently repost specific domains or engage with specific narratives without adding nuance?
3.  **Emotional Valence:** Analyze the aggregate emotional tone (Alarmist, Neutral, Aggressive, Satirical).
4.  **Bias Detection:** Identify explicit political or ideological biases based on the text.
5.  **Credibility Weighting:** Based on the content, assign a "Historical Credibility Score" (0.0 to 1.0).
    *   0.0 = High frequency of inflammatory/unverified claims.
    *   1.0 = Consistently neutral or verified sourcing.

**OUTPUT FORMAT (Strict JSON):**
{{
  "username": "@{username}",
  "thematic_clusters": ["Topic A", "Topic B"],
  "echo_chamber_detected": boolean,
  "bias_assessment": "Description of bias...",
  "emotional_valence": "Dominant tone...",
  "credibility_score": float,
  "summary_profile": "A concise paragraph summarizing the user's role in the information ecosystem."
}}
"""

async def load_user_history(username: str, limit: int = 50) -> str:
    """
    Reads the user's history.csv and formats it into a text block for the LLM.
    """
    csv_path = Path(f"data/profiles/{username}/history.csv")
    if not csv_path.exists():
        return ""

    timeline_entries =[]
    try:
        with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
            reader = csv.DictReader(f)
            # Read all, sort by date descending if needed, but scraper usually does desc
            rows = list(reader)
            # Take latest 'limit' posts
            recent_rows = rows[-limit:] 
            
            for row in recent_rows:
                entry = (
                    f"[{row['timestamp']}] "
                    f"{'REPOST' if row.get('is_reply')=='True' else 'POST'}: "
                    f"\"{row['text']}\" "
                    f"(Likes: {row['metric_likes']}, Views: {row['metric_views']})"
                )
                timeline_entries.append(entry)
    except Exception as e:
        logger.error(f"Error reading history for {username}: {e}")
        return ""

    return "\n".join(timeline_entries)

async def generate_user_profile_report(username: str):
    """
    Orchestrates the analysis pipeline:
    1. Load History.
    2. Construct Prompt.
    3. Call LLM (using Vertex/Gemini config from environment or default).
    4. Save JSON Report.
    """
    logger.info(f"Starting analysis for user: {username}")
    
    timeline_text = await load_user_history(username)
    if not timeline_text:
        return {"error": "No history found or empty timeline."}

    # Format Prompt
    prompt = PROMPT_USER_PROFILING.format(username=username, timeline_text=timeline_text)

    # Use Vertex AI by default if configured, else try Gemini Legacy
    # For now, we reuse the pipeline functions in inference_logic if available, 
    # or create a direct call here for simplicity.
    
    # We'll assume Vertex is the primary backend for this advanced analysis
    # This requires valid credentials in the environment or passed config.
    # Fallback to a placeholder if no model is loaded.
    
    report_json = {}
    
    try:
        # Attempt to use the existing Vertex Client in inference_logic if initialized
        # Otherwise, we instantiate a quick one if env vars exist
        project_id = os.getenv("VERTEX_PROJECT_ID")
        location = os.getenv("VERTEX_LOCATION", "us-central1")
        api_key = os.getenv("VERTEX_API_KEY")
        
        if inference_logic.genai and project_id:
            from google.genai import Client
            from google.genai.types import GenerateContentConfig
            
            if api_key:
                client = Client(vertexai=True, project=project_id, location=location, api_key=api_key)
            else:
                client = Client(vertexai=True, project=project_id, location=location)
                
            response = client.models.generate_content(
                model="gemini-1.5-pro",
                contents=prompt,
                config=GenerateContentConfig(response_mime_type="application/json")
            )
            report_text = response.text
            report_json = json.loads(report_text)
            
        else:
            # Fallback Mock for Demo/LITE mode
            logger.warning("Vertex AI credentials not found. Generating Mock Analysis.")
            report_json = {
                "username": f"@{username}",
                "thematic_clusters":["Simulated Topic 1", "Simulated Topic 2"],
                "bias_assessment": "System running in LITE mode. Configure Vertex AI for real analysis.",
                "credibility_score": 0.5,
                "summary_profile": "Mock profile generated because AI backend is not active."
            }

    except Exception as e:
        logger.error(f"LLM Analysis failed: {e}")
        report_json = {"error": str(e)}

    # Save Report
    output_path = Path(f"data/profiles/{username}/analysis_report.json")
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(report_json, f, indent=2)

    return report_json