Spaces:

syaikhipin
/

ForesightSphere

Sleeping

App Files Files Community

ForesightSphere / src /gemini_analyzer.py

syaikhipin

Upload 25 files

0e66264 verified 10 months ago

raw

history blame contribute delete

10.8 kB

	import google.generativeai as genai
	import json
	import re
	from typing import Dict, List, Optional
	from datetime import datetime
	import time
	import sys
	import os
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	from config import config

	class GeminiAnalyzer:
	def __init__(self, api_key: str):
	self.api_key = api_key
	genai.configure(api_key=api_key)
	self.model = genai.GenerativeModel(config.GEMINI_MODEL)
	self.cache = {}
	self.cache_duration = config.CACHE_DURATION

	def test_connection(self) -> bool:
	"""Test if Gemini API is accessible"""
	try:
	response = self.model.generate_content("Test connection")
	return True
	except Exception as e:
	print(f"❌ Gemini connection failed: {e}")
	return False

	def analyze_content(self, content: str) -> Dict:
	"""Analyze content for disinformation indicators"""
	cache_key = f"analyze_{hash(content)}"

	# Check cache first
	if cache_key in self.cache:
	cached_result, timestamp = self.cache[cache_key]
	if time.time() - timestamp < self.cache_duration:
	return cached_result

	prompt = f"""
	Analyze this content for disinformation indicators. Provide a comprehensive assessment:

	Content: "{content}"

	Please assess and respond in the following JSON format:
	{{
	"risk_score": <0-100 integer>,
	"factual_accuracy": <0-100 integer>,
	"emotional_manipulation": <0-100 integer>,
	"historical_similarity": "<description of similar past narratives>",
	"likely_origin": "<assessment of likely source/origin>",
	"intent": "<assessment of intent behind the content>",
	"spread_prediction": "<detailed prediction of how this might spread in next 24-48 hours>",
	"key_claims": ["<claim 1>", "<claim 2>", "<claim 3>"],
	"manipulation_tactics": ["<tactic 1>", "<tactic 2>"],
	"target_demographics": ["<demographic 1>", "<demographic 2>"],
	"geographic_focus": ["<region 1>", "<region 2>"],
	"confidence_level": <0-100 integer>
	}}

	Assessment criteria:
	1. Factual accuracy probability (0=completely false, 100=completely accurate)
	2. Emotional manipulation tactics present
	3. Similar historical narratives for pattern matching
	4. Likely origin and intent assessment
	5. Predicted spread trajectory considering virality factors
	6. Overall risk score based on potential harm and reach
	"""

	try:
	response = self.model.generate_content(prompt)
	result_text = response.text

	# Extract JSON from response
	json_match = re.search(r'\{.*\}', result_text, re.DOTALL)
	if json_match:
	analysis = json.loads(json_match.group())
	else:
	# Fallback parsing
	analysis = self._parse_fallback_response(result_text, content)

	# Cache the result
	self.cache[cache_key] = (analysis, time.time())

	return analysis

	except Exception as e:
	print(f"❌ Analysis failed: {e}")
	return self._get_default_analysis(content)

	def _parse_fallback_response(self, response_text: str, content: str) -> Dict:
	"""Fallback parsing when JSON extraction fails"""
	# Simple pattern matching for key information
	risk_score = 50 # Default medium risk
	if any(word in response_text.lower() for word in ['high risk', 'dangerous', 'harmful']):
	risk_score = 85
	elif any(word in response_text.lower() for word in ['low risk', 'safe', 'accurate']):
	risk_score = 25

	return {
	"risk_score": risk_score,
	"factual_accuracy": 50,
	"emotional_manipulation": 60,
	"historical_similarity": "Pattern analysis unavailable",
	"likely_origin": "Unknown source",
	"intent": "Assessment unavailable",
	"spread_prediction": "Moderate spread potential based on content characteristics",
	"key_claims": [content[:100] + "..." if len(content) > 100 else content],
	"manipulation_tactics": ["Emotional appeal"],
	"target_demographics": ["General population"],
	"geographic_focus": ["Global"],
	"confidence_level": 60
	}

	def _get_default_analysis(self, content: str) -> Dict:
	"""Return default analysis when API fails"""
	return {
	"risk_score": 50,
	"factual_accuracy": 50,
	"emotional_manipulation": 50,
	"historical_similarity": "Analysis unavailable - API error",
	"likely_origin": "Unknown",
	"intent": "Unknown",
	"spread_prediction": "Unable to predict - analysis failed",
	"key_claims": ["Analysis failed"],
	"manipulation_tactics": ["Unknown"],
	"target_demographics": ["Unknown"],
	"geographic_focus": ["Unknown"],
	"confidence_level": 0
	}

	def generate_prediction_scenarios(self, narrative_content: str, historical_patterns: List[Dict] = None) -> List[Dict]:
	"""Generate multiple prediction scenarios for a narrative"""
	prompt = f"""
	Based on this narrative content and historical disinformation patterns, generate 3-5 likely evolution scenarios:

	Narrative: "{narrative_content}"

	For each scenario, provide:
	{{
	"scenario_name": "<descriptive name>",
	"description": "<detailed description>",
	"probability": <0-100 percentage>,
	"timeline_hours": <hours until peak impact>,
	"predicted_reach": <estimated number of people reached>,
	"key_events": ["<event 1>", "<event 2>"],
	"mitigation_strategies": ["<strategy 1>", "<strategy 2>"]
	}}

	Consider factors like:
	- Content virality potential
	- Current social/political climate
	- Platform algorithms
	- Historical spread patterns
	- Audience susceptibility
	"""

	try:
	response = self.model.generate_content(prompt)
	# Parse scenarios from response
	scenarios = self._parse_scenarios(response.text)
	return scenarios
	except Exception as e:
	print(f"❌ Scenario generation failed: {e}")
	return self._get_default_scenarios()

	def _parse_scenarios(self, response_text: str) -> List[Dict]:
	"""Parse scenarios from Gemini response"""
	# Try to extract JSON scenarios
	scenarios = []
	try:
	# Look for JSON-like structures in the response
	json_matches = re.findall(r'\{[^{}]*\}', response_text, re.DOTALL)
	for match in json_matches:
	try:
	scenario = json.loads(match)
	scenarios.append(scenario)
	except:
	continue
	except:
	pass

	if not scenarios:
	return self._get_default_scenarios()

	return scenarios[:5] # Limit to 5 scenarios

	def _get_default_scenarios(self) -> List[Dict]:
	"""Return default scenarios when generation fails"""
	return [
	{
	"scenario_name": "Minimal Spread",
	"description": "Content remains within original echo chambers with limited amplification",
	"probability": 40,
	"timeline_hours": 12,
	"predicted_reach": 5000,
	"key_events": ["Initial shares", "Limited engagement"],
	"mitigation_strategies": ["Early fact-checking", "Counter-messaging"]
	},
	{
	"scenario_name": "Moderate Viral Growth",
	"description": "Content gains traction across multiple platforms with algorithmic amplification",
	"probability": 35,
	"timeline_hours": 24,
	"predicted_reach": 50000,
	"key_events": ["Cross-platform sharing", "Influencer pickup"],
	"mitigation_strategies": ["Platform reporting", "Authoritative sources"]
	},
	{
	"scenario_name": "Rapid Viral Spread",
	"description": "Content explodes across all major platforms with mainstream media coverage",
	"probability": 25,
	"timeline_hours": 48,
	"predicted_reach": 500000,
	"key_events": ["Media coverage", "Political amplification"],
	"mitigation_strategies": ["Emergency response", "Coordinated debunking"]
	}
	]

	def assess_sentiment_trends(self, content_list: List[str]) -> Dict:
	"""Analyze sentiment trends across multiple pieces of content"""
	prompt = f"""
	Analyze the sentiment trends across these related content pieces:

	Content pieces:
	{chr(10).join([f"{i+1}. {content}" for i, content in enumerate(content_list)])}

	Provide analysis in JSON format:
	{{
	"overall_sentiment": "<positive/negative/neutral>",
	"sentiment_score": <-100 to 100>,
	"emotion_distribution": {{
	"anger": <0-100>,
	"fear": <0-100>,
	"joy": <0-100>,
	"sadness": <0-100>,
	"disgust": <0-100>
	}},
	"sentiment_trajectory": "<increasing/decreasing/stable>",
	"key_emotional_triggers": ["<trigger 1>", "<trigger 2>"]
	}}
	"""

	try:
	response = self.model.generate_content(prompt)
	json_match = re.search(r'\{.*\}', response.text, re.DOTALL)
	if json_match:
	return json.loads(json_match.group())
	except Exception as e:
	print(f"❌ Sentiment analysis failed: {e}")

	return {
	"overall_sentiment": "neutral",
	"sentiment_score": 0,
	"emotion_distribution": {
	"anger": 20,
	"fear": 20,
	"joy": 20,
	"sadness": 20,
	"disgust": 20
	},
	"sentiment_trajectory": "stable",
	"key_emotional_triggers": ["Unknown"]
	}

	def clear_cache(self):
	"""Clear the analysis cache"""
	self.cache.clear()