Spaces:

GlazedDon0t
/

liarMP4

Sleeping

App Files Files Community

liarMP4 / src /user_analysis_logic.py

GlazedDon0t

fina p3

5dae8fe 2 days ago

raw

history blame contribute delete

5.85 kB

	import os
	import csv
	import json
	import logging
	import asyncio
	from pathlib import Path
	import inference_logic

	# Configure Logging
	logger = logging.getLogger(__name__)

	# --- Prompts for User Analysis ---

	PROMPT_USER_PROFILING = """
	You are an Expert Intelligence Analyst specializing in Information Integrity and Social Influence Operations.

	TASK:
	Analyze the following timeline of social media posts from a single user: "@{username}".
	Your goal is to construct a "Credibility & Bias Profile" based on their historical behavior.

	INPUT DATA (Recent Posts):
	{timeline_text}

	ANALYSIS REQUIREMENTS:
	1. Thematic Clusters: What subjects does this user repeatedly post about? (e.g., "Crypto", "US Politics", "Climate Skepticism").
	2. Echo Chamber Indicators: Does the user frequently repost specific domains or engage with specific narratives without adding nuance?
	3. Emotional Valence: Analyze the aggregate emotional tone (Alarmist, Neutral, Aggressive, Satirical).
	4. Bias Detection: Identify explicit political or ideological biases based on the text.
	5. Credibility Weighting: Based on the content, assign a "Historical Credibility Score" (0.0 to 1.0).
	* 0.0 = High frequency of inflammatory/unverified claims.
	* 1.0 = Consistently neutral or verified sourcing.

	OUTPUT FORMAT (Strict JSON):
	{{
	"username": "@{username}",
	"thematic_clusters": ["Topic A", "Topic B"],
	"echo_chamber_detected": boolean,
	"bias_assessment": "Description of bias...",
	"emotional_valence": "Dominant tone...",
	"credibility_score": float,
	"summary_profile": "A concise paragraph summarizing the user's role in the information ecosystem."
	}}
	"""

	async def load_user_history(username: str, limit: int = 50) -> str:
	"""
	Reads the user's history.csv and formats it into a text block for the LLM.
	"""
	csv_path = Path(f"data/profiles/{username}/history.csv")
	if not csv_path.exists():
	return ""

	timeline_entries =[]
	try:
	with open(csv_path, 'r', encoding='utf-8', errors='replace') as f:
	reader = csv.DictReader(f)
	# Read all, sort by date descending if needed, but scraper usually does desc
	rows = list(reader)
	# Take latest 'limit' posts
	recent_rows = rows[-limit:]

	for row in recent_rows:
	entry = (
	f"[{row['timestamp']}] "
	f"{'REPOST' if row.get('is_reply')=='True' else 'POST'}: "
	f"\"{row['text']}\" "
	f"(Likes: {row['metric_likes']}, Views: {row['metric_views']})"
	)
	timeline_entries.append(entry)
	except Exception as e:
	logger.error(f"Error reading history for {username}: {e}")
	return ""

	return "\n".join(timeline_entries)

	async def generate_user_profile_report(username: str):
	"""
	Orchestrates the analysis pipeline:
	1. Load History.
	2. Construct Prompt.
	3. Call LLM (using Vertex/Gemini config from environment or default).
	4. Save JSON Report.
	"""
	logger.info(f"Starting analysis for user: {username}")

	timeline_text = await load_user_history(username)
	if not timeline_text:
	return {"error": "No history found or empty timeline."}

	# Format Prompt
	prompt = PROMPT_USER_PROFILING.format(username=username, timeline_text=timeline_text)

	# Use Vertex AI by default if configured, else try Gemini Legacy
	# For now, we reuse the pipeline functions in inference_logic if available,
	# or create a direct call here for simplicity.

	# We'll assume Vertex is the primary backend for this advanced analysis
	# This requires valid credentials in the environment or passed config.
	# Fallback to a placeholder if no model is loaded.

	report_json = {}

	try:
	# Attempt to use the existing Vertex Client in inference_logic if initialized
	# Otherwise, we instantiate a quick one if env vars exist
	project_id = os.getenv("VERTEX_PROJECT_ID")
	location = os.getenv("VERTEX_LOCATION", "us-central1")
	api_key = os.getenv("VERTEX_API_KEY")

	if inference_logic.genai and project_id:
	from google.genai import Client
	from google.genai.types import GenerateContentConfig

	if api_key:
	client = Client(vertexai=True, project=project_id, location=location, api_key=api_key)
	else:
	client = Client(vertexai=True, project=project_id, location=location)

	response = client.models.generate_content(
	model="gemini-1.5-pro",
	contents=prompt,
	config=GenerateContentConfig(response_mime_type="application/json")
	)
	report_text = response.text
	report_json = json.loads(report_text)

	else:
	# Fallback Mock for Demo/LITE mode
	logger.warning("Vertex AI credentials not found. Generating Mock Analysis.")
	report_json = {
	"username": f"@{username}",
	"thematic_clusters":["Simulated Topic 1", "Simulated Topic 2"],
	"bias_assessment": "System running in LITE mode. Configure Vertex AI for real analysis.",
	"credibility_score": 0.5,
	"summary_profile": "Mock profile generated because AI backend is not active."
	}

	except Exception as e:
	logger.error(f"LLM Analysis failed: {e}")
	report_json = {"error": str(e)}

	# Save Report
	output_path = Path(f"data/profiles/{username}/analysis_report.json")
	with open(output_path, 'w', encoding='utf-8') as f:
	json.dump(report_json, f, indent=2)

	return report_json