Spaces:
Sleeping
Sleeping
| import google.generativeai as genai | |
| import json | |
| import re | |
| from typing import Dict, List, Optional | |
| from datetime import datetime | |
| import time | |
| import sys | |
| import os | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from config import config | |
| class GeminiAnalyzer: | |
| def __init__(self, api_key: str): | |
| self.api_key = api_key | |
| genai.configure(api_key=api_key) | |
| self.model = genai.GenerativeModel(config.GEMINI_MODEL) | |
| self.cache = {} | |
| self.cache_duration = config.CACHE_DURATION | |
| def test_connection(self) -> bool: | |
| """Test if Gemini API is accessible""" | |
| try: | |
| response = self.model.generate_content("Test connection") | |
| return True | |
| except Exception as e: | |
| print(f"❌ Gemini connection failed: {e}") | |
| return False | |
| def analyze_content(self, content: str) -> Dict: | |
| """Analyze content for disinformation indicators""" | |
| cache_key = f"analyze_{hash(content)}" | |
| # Check cache first | |
| if cache_key in self.cache: | |
| cached_result, timestamp = self.cache[cache_key] | |
| if time.time() - timestamp < self.cache_duration: | |
| return cached_result | |
| prompt = f""" | |
| Analyze this content for disinformation indicators. Provide a comprehensive assessment: | |
| Content: "{content}" | |
| Please assess and respond in the following JSON format: | |
| {{ | |
| "risk_score": <0-100 integer>, | |
| "factual_accuracy": <0-100 integer>, | |
| "emotional_manipulation": <0-100 integer>, | |
| "historical_similarity": "<description of similar past narratives>", | |
| "likely_origin": "<assessment of likely source/origin>", | |
| "intent": "<assessment of intent behind the content>", | |
| "spread_prediction": "<detailed prediction of how this might spread in next 24-48 hours>", | |
| "key_claims": ["<claim 1>", "<claim 2>", "<claim 3>"], | |
| "manipulation_tactics": ["<tactic 1>", "<tactic 2>"], | |
| "target_demographics": ["<demographic 1>", "<demographic 2>"], | |
| "geographic_focus": ["<region 1>", "<region 2>"], | |
| "confidence_level": <0-100 integer> | |
| }} | |
| Assessment criteria: | |
| 1. Factual accuracy probability (0=completely false, 100=completely accurate) | |
| 2. Emotional manipulation tactics present | |
| 3. Similar historical narratives for pattern matching | |
| 4. Likely origin and intent assessment | |
| 5. Predicted spread trajectory considering virality factors | |
| 6. Overall risk score based on potential harm and reach | |
| """ | |
| try: | |
| response = self.model.generate_content(prompt) | |
| result_text = response.text | |
| # Extract JSON from response | |
| json_match = re.search(r'\{.*\}', result_text, re.DOTALL) | |
| if json_match: | |
| analysis = json.loads(json_match.group()) | |
| else: | |
| # Fallback parsing | |
| analysis = self._parse_fallback_response(result_text, content) | |
| # Cache the result | |
| self.cache[cache_key] = (analysis, time.time()) | |
| return analysis | |
| except Exception as e: | |
| print(f"❌ Analysis failed: {e}") | |
| return self._get_default_analysis(content) | |
| def _parse_fallback_response(self, response_text: str, content: str) -> Dict: | |
| """Fallback parsing when JSON extraction fails""" | |
| # Simple pattern matching for key information | |
| risk_score = 50 # Default medium risk | |
| if any(word in response_text.lower() for word in ['high risk', 'dangerous', 'harmful']): | |
| risk_score = 85 | |
| elif any(word in response_text.lower() for word in ['low risk', 'safe', 'accurate']): | |
| risk_score = 25 | |
| return { | |
| "risk_score": risk_score, | |
| "factual_accuracy": 50, | |
| "emotional_manipulation": 60, | |
| "historical_similarity": "Pattern analysis unavailable", | |
| "likely_origin": "Unknown source", | |
| "intent": "Assessment unavailable", | |
| "spread_prediction": "Moderate spread potential based on content characteristics", | |
| "key_claims": [content[:100] + "..." if len(content) > 100 else content], | |
| "manipulation_tactics": ["Emotional appeal"], | |
| "target_demographics": ["General population"], | |
| "geographic_focus": ["Global"], | |
| "confidence_level": 60 | |
| } | |
| def _get_default_analysis(self, content: str) -> Dict: | |
| """Return default analysis when API fails""" | |
| return { | |
| "risk_score": 50, | |
| "factual_accuracy": 50, | |
| "emotional_manipulation": 50, | |
| "historical_similarity": "Analysis unavailable - API error", | |
| "likely_origin": "Unknown", | |
| "intent": "Unknown", | |
| "spread_prediction": "Unable to predict - analysis failed", | |
| "key_claims": ["Analysis failed"], | |
| "manipulation_tactics": ["Unknown"], | |
| "target_demographics": ["Unknown"], | |
| "geographic_focus": ["Unknown"], | |
| "confidence_level": 0 | |
| } | |
| def generate_prediction_scenarios(self, narrative_content: str, historical_patterns: List[Dict] = None) -> List[Dict]: | |
| """Generate multiple prediction scenarios for a narrative""" | |
| prompt = f""" | |
| Based on this narrative content and historical disinformation patterns, generate 3-5 likely evolution scenarios: | |
| Narrative: "{narrative_content}" | |
| For each scenario, provide: | |
| {{ | |
| "scenario_name": "<descriptive name>", | |
| "description": "<detailed description>", | |
| "probability": <0-100 percentage>, | |
| "timeline_hours": <hours until peak impact>, | |
| "predicted_reach": <estimated number of people reached>, | |
| "key_events": ["<event 1>", "<event 2>"], | |
| "mitigation_strategies": ["<strategy 1>", "<strategy 2>"] | |
| }} | |
| Consider factors like: | |
| - Content virality potential | |
| - Current social/political climate | |
| - Platform algorithms | |
| - Historical spread patterns | |
| - Audience susceptibility | |
| """ | |
| try: | |
| response = self.model.generate_content(prompt) | |
| # Parse scenarios from response | |
| scenarios = self._parse_scenarios(response.text) | |
| return scenarios | |
| except Exception as e: | |
| print(f"❌ Scenario generation failed: {e}") | |
| return self._get_default_scenarios() | |
| def _parse_scenarios(self, response_text: str) -> List[Dict]: | |
| """Parse scenarios from Gemini response""" | |
| # Try to extract JSON scenarios | |
| scenarios = [] | |
| try: | |
| # Look for JSON-like structures in the response | |
| json_matches = re.findall(r'\{[^{}]*\}', response_text, re.DOTALL) | |
| for match in json_matches: | |
| try: | |
| scenario = json.loads(match) | |
| scenarios.append(scenario) | |
| except: | |
| continue | |
| except: | |
| pass | |
| if not scenarios: | |
| return self._get_default_scenarios() | |
| return scenarios[:5] # Limit to 5 scenarios | |
| def _get_default_scenarios(self) -> List[Dict]: | |
| """Return default scenarios when generation fails""" | |
| return [ | |
| { | |
| "scenario_name": "Minimal Spread", | |
| "description": "Content remains within original echo chambers with limited amplification", | |
| "probability": 40, | |
| "timeline_hours": 12, | |
| "predicted_reach": 5000, | |
| "key_events": ["Initial shares", "Limited engagement"], | |
| "mitigation_strategies": ["Early fact-checking", "Counter-messaging"] | |
| }, | |
| { | |
| "scenario_name": "Moderate Viral Growth", | |
| "description": "Content gains traction across multiple platforms with algorithmic amplification", | |
| "probability": 35, | |
| "timeline_hours": 24, | |
| "predicted_reach": 50000, | |
| "key_events": ["Cross-platform sharing", "Influencer pickup"], | |
| "mitigation_strategies": ["Platform reporting", "Authoritative sources"] | |
| }, | |
| { | |
| "scenario_name": "Rapid Viral Spread", | |
| "description": "Content explodes across all major platforms with mainstream media coverage", | |
| "probability": 25, | |
| "timeline_hours": 48, | |
| "predicted_reach": 500000, | |
| "key_events": ["Media coverage", "Political amplification"], | |
| "mitigation_strategies": ["Emergency response", "Coordinated debunking"] | |
| } | |
| ] | |
| def assess_sentiment_trends(self, content_list: List[str]) -> Dict: | |
| """Analyze sentiment trends across multiple pieces of content""" | |
| prompt = f""" | |
| Analyze the sentiment trends across these related content pieces: | |
| Content pieces: | |
| {chr(10).join([f"{i+1}. {content}" for i, content in enumerate(content_list)])} | |
| Provide analysis in JSON format: | |
| {{ | |
| "overall_sentiment": "<positive/negative/neutral>", | |
| "sentiment_score": <-100 to 100>, | |
| "emotion_distribution": {{ | |
| "anger": <0-100>, | |
| "fear": <0-100>, | |
| "joy": <0-100>, | |
| "sadness": <0-100>, | |
| "disgust": <0-100> | |
| }}, | |
| "sentiment_trajectory": "<increasing/decreasing/stable>", | |
| "key_emotional_triggers": ["<trigger 1>", "<trigger 2>"] | |
| }} | |
| """ | |
| try: | |
| response = self.model.generate_content(prompt) | |
| json_match = re.search(r'\{.*\}', response.text, re.DOTALL) | |
| if json_match: | |
| return json.loads(json_match.group()) | |
| except Exception as e: | |
| print(f"❌ Sentiment analysis failed: {e}") | |
| return { | |
| "overall_sentiment": "neutral", | |
| "sentiment_score": 0, | |
| "emotion_distribution": { | |
| "anger": 20, | |
| "fear": 20, | |
| "joy": 20, | |
| "sadness": 20, | |
| "disgust": 20 | |
| }, | |
| "sentiment_trajectory": "stable", | |
| "key_emotional_triggers": ["Unknown"] | |
| } | |
| def clear_cache(self): | |
| """Clear the analysis cache""" | |
| self.cache.clear() |