ForesightSphere / src /gemini_analyzer.py
syaikhipin's picture
Upload 25 files
0e66264 verified
import google.generativeai as genai
import json
import re
from typing import Dict, List, Optional
from datetime import datetime
import time
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import config
class GeminiAnalyzer:
def __init__(self, api_key: str):
self.api_key = api_key
genai.configure(api_key=api_key)
self.model = genai.GenerativeModel(config.GEMINI_MODEL)
self.cache = {}
self.cache_duration = config.CACHE_DURATION
def test_connection(self) -> bool:
"""Test if Gemini API is accessible"""
try:
response = self.model.generate_content("Test connection")
return True
except Exception as e:
print(f"❌ Gemini connection failed: {e}")
return False
def analyze_content(self, content: str) -> Dict:
"""Analyze content for disinformation indicators"""
cache_key = f"analyze_{hash(content)}"
# Check cache first
if cache_key in self.cache:
cached_result, timestamp = self.cache[cache_key]
if time.time() - timestamp < self.cache_duration:
return cached_result
prompt = f"""
Analyze this content for disinformation indicators. Provide a comprehensive assessment:
Content: "{content}"
Please assess and respond in the following JSON format:
{{
"risk_score": <0-100 integer>,
"factual_accuracy": <0-100 integer>,
"emotional_manipulation": <0-100 integer>,
"historical_similarity": "<description of similar past narratives>",
"likely_origin": "<assessment of likely source/origin>",
"intent": "<assessment of intent behind the content>",
"spread_prediction": "<detailed prediction of how this might spread in next 24-48 hours>",
"key_claims": ["<claim 1>", "<claim 2>", "<claim 3>"],
"manipulation_tactics": ["<tactic 1>", "<tactic 2>"],
"target_demographics": ["<demographic 1>", "<demographic 2>"],
"geographic_focus": ["<region 1>", "<region 2>"],
"confidence_level": <0-100 integer>
}}
Assessment criteria:
1. Factual accuracy probability (0=completely false, 100=completely accurate)
2. Emotional manipulation tactics present
3. Similar historical narratives for pattern matching
4. Likely origin and intent assessment
5. Predicted spread trajectory considering virality factors
6. Overall risk score based on potential harm and reach
"""
try:
response = self.model.generate_content(prompt)
result_text = response.text
# Extract JSON from response
json_match = re.search(r'\{.*\}', result_text, re.DOTALL)
if json_match:
analysis = json.loads(json_match.group())
else:
# Fallback parsing
analysis = self._parse_fallback_response(result_text, content)
# Cache the result
self.cache[cache_key] = (analysis, time.time())
return analysis
except Exception as e:
print(f"❌ Analysis failed: {e}")
return self._get_default_analysis(content)
def _parse_fallback_response(self, response_text: str, content: str) -> Dict:
"""Fallback parsing when JSON extraction fails"""
# Simple pattern matching for key information
risk_score = 50 # Default medium risk
if any(word in response_text.lower() for word in ['high risk', 'dangerous', 'harmful']):
risk_score = 85
elif any(word in response_text.lower() for word in ['low risk', 'safe', 'accurate']):
risk_score = 25
return {
"risk_score": risk_score,
"factual_accuracy": 50,
"emotional_manipulation": 60,
"historical_similarity": "Pattern analysis unavailable",
"likely_origin": "Unknown source",
"intent": "Assessment unavailable",
"spread_prediction": "Moderate spread potential based on content characteristics",
"key_claims": [content[:100] + "..." if len(content) > 100 else content],
"manipulation_tactics": ["Emotional appeal"],
"target_demographics": ["General population"],
"geographic_focus": ["Global"],
"confidence_level": 60
}
def _get_default_analysis(self, content: str) -> Dict:
"""Return default analysis when API fails"""
return {
"risk_score": 50,
"factual_accuracy": 50,
"emotional_manipulation": 50,
"historical_similarity": "Analysis unavailable - API error",
"likely_origin": "Unknown",
"intent": "Unknown",
"spread_prediction": "Unable to predict - analysis failed",
"key_claims": ["Analysis failed"],
"manipulation_tactics": ["Unknown"],
"target_demographics": ["Unknown"],
"geographic_focus": ["Unknown"],
"confidence_level": 0
}
def generate_prediction_scenarios(self, narrative_content: str, historical_patterns: List[Dict] = None) -> List[Dict]:
"""Generate multiple prediction scenarios for a narrative"""
prompt = f"""
Based on this narrative content and historical disinformation patterns, generate 3-5 likely evolution scenarios:
Narrative: "{narrative_content}"
For each scenario, provide:
{{
"scenario_name": "<descriptive name>",
"description": "<detailed description>",
"probability": <0-100 percentage>,
"timeline_hours": <hours until peak impact>,
"predicted_reach": <estimated number of people reached>,
"key_events": ["<event 1>", "<event 2>"],
"mitigation_strategies": ["<strategy 1>", "<strategy 2>"]
}}
Consider factors like:
- Content virality potential
- Current social/political climate
- Platform algorithms
- Historical spread patterns
- Audience susceptibility
"""
try:
response = self.model.generate_content(prompt)
# Parse scenarios from response
scenarios = self._parse_scenarios(response.text)
return scenarios
except Exception as e:
print(f"❌ Scenario generation failed: {e}")
return self._get_default_scenarios()
def _parse_scenarios(self, response_text: str) -> List[Dict]:
"""Parse scenarios from Gemini response"""
# Try to extract JSON scenarios
scenarios = []
try:
# Look for JSON-like structures in the response
json_matches = re.findall(r'\{[^{}]*\}', response_text, re.DOTALL)
for match in json_matches:
try:
scenario = json.loads(match)
scenarios.append(scenario)
except:
continue
except:
pass
if not scenarios:
return self._get_default_scenarios()
return scenarios[:5] # Limit to 5 scenarios
def _get_default_scenarios(self) -> List[Dict]:
"""Return default scenarios when generation fails"""
return [
{
"scenario_name": "Minimal Spread",
"description": "Content remains within original echo chambers with limited amplification",
"probability": 40,
"timeline_hours": 12,
"predicted_reach": 5000,
"key_events": ["Initial shares", "Limited engagement"],
"mitigation_strategies": ["Early fact-checking", "Counter-messaging"]
},
{
"scenario_name": "Moderate Viral Growth",
"description": "Content gains traction across multiple platforms with algorithmic amplification",
"probability": 35,
"timeline_hours": 24,
"predicted_reach": 50000,
"key_events": ["Cross-platform sharing", "Influencer pickup"],
"mitigation_strategies": ["Platform reporting", "Authoritative sources"]
},
{
"scenario_name": "Rapid Viral Spread",
"description": "Content explodes across all major platforms with mainstream media coverage",
"probability": 25,
"timeline_hours": 48,
"predicted_reach": 500000,
"key_events": ["Media coverage", "Political amplification"],
"mitigation_strategies": ["Emergency response", "Coordinated debunking"]
}
]
def assess_sentiment_trends(self, content_list: List[str]) -> Dict:
"""Analyze sentiment trends across multiple pieces of content"""
prompt = f"""
Analyze the sentiment trends across these related content pieces:
Content pieces:
{chr(10).join([f"{i+1}. {content}" for i, content in enumerate(content_list)])}
Provide analysis in JSON format:
{{
"overall_sentiment": "<positive/negative/neutral>",
"sentiment_score": <-100 to 100>,
"emotion_distribution": {{
"anger": <0-100>,
"fear": <0-100>,
"joy": <0-100>,
"sadness": <0-100>,
"disgust": <0-100>
}},
"sentiment_trajectory": "<increasing/decreasing/stable>",
"key_emotional_triggers": ["<trigger 1>", "<trigger 2>"]
}}
"""
try:
response = self.model.generate_content(prompt)
json_match = re.search(r'\{.*\}', response.text, re.DOTALL)
if json_match:
return json.loads(json_match.group())
except Exception as e:
print(f"❌ Sentiment analysis failed: {e}")
return {
"overall_sentiment": "neutral",
"sentiment_score": 0,
"emotion_distribution": {
"anger": 20,
"fear": 20,
"joy": 20,
"sadness": 20,
"disgust": 20
},
"sentiment_trajectory": "stable",
"key_emotional_triggers": ["Unknown"]
}
def clear_cache(self):
"""Clear the analysis cache"""
self.cache.clear()