Spaces:

PercivalFletcher
/

Verifact-Bot

Runtime error

App Files Files Community

Verifact-Bot / bot.py

PercivalFletcher

Upload 2 files

ecfa3ec verified 3 months ago

raw

history blame contribute delete

15.3 kB

	import logging
	import os
	import io
	import html
	import aiohttp
	import socket
	from urllib.parse import urlparse
	from dotenv import load_dotenv # <--- NEW IMPORT
	from telegram import Update, constants
	from telegram.ext import ApplicationBuilder, ContextTypes, MessageHandler, filters

	# ==========================================
	# ⚙️ CONFIGURATION
	# ==========================================

	# 1. Load environment variables from the .env file
	load_dotenv()

	# 2. Retrieve values
	BOT_TOKEN = os.getenv("BOT_TOKEN", "").strip().replace('"', '').replace("'", "")
	BACKEND_API_URL = os.getenv("EXTERNAL_ANALYSIS_API_URL", "").strip().replace('"', '').replace("'", "")
	API_KEY = os.getenv("API_KEY")

	# Check if critical vars are missing
	if not BOT_TOKEN or not BACKEND_API_URL:
	raise ValueError("❌ Error: BOT_TOKEN or BACKEND_API_URL is missing from .env file")

	# ==========================================
	# 📝 LOGGING SETUP
	# ==========================================
	logging.basicConfig(
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	level=logging.INFO
	)
	logger = logging.getLogger(__name__)

	# ==========================================
	# 🧠 REPORT FORMATTER (JSON -> HTML)
	# ==========================================
	def format_analysis_report(data):
	"""
	Converts the complex Backend JSON into a readable HTML Telegram message.
	"""
	try:
	# --- HEADER ---
	tag = data.get("tag", "Analysis")
	overall_summary = data.get("overall_summary", "No summary provided.")
	source_cred_list = data.get("source_credibility_summary", [])

	# Determine icon based on tag content
	tag_lower = tag.lower()
	if "true" in tag_lower or "verified" in tag_lower:
	icon = "🟢"
	elif "false" in tag_lower or "misinfo" in tag_lower or "fake" in tag_lower:
	icon = "🔴"
	else:
	icon = "⚠️"

	# Start building the message
	message = f"<b>🚨 VERIFACT ANALYSIS REPORT</b>\n"
	message += "━━━━━━━━━━━━━━━━━━━\n"
	message += f"<b>Result:</b> {icon} <b>{html.escape(tag.upper())}</b>\n"

	# Source Credibility Summary (Average)
	if source_cred_list:
	total_score = 0
	count = 0
	for item in source_cred_list:
	if isinstance(item, dict) and 'credibility_score' in item:
	try:
	total_score += int(item['credibility_score'])
	count += 1
	except (ValueError, TypeError):
	pass

	if count > 0:
	avg_score = int(total_score / count)
	# Determine label based on average
	if avg_score >= 80:
	cred_label = "High"
	elif avg_score >= 60:
	cred_label = "Moderate"
	else:
	cred_label = "Low"
	message += f"<b>Source Credibility:</b> {cred_label} ({avg_score}%)\n"

	message += "\n<b>📝 Summary:</b>\n"
	message += f"<i>{html.escape(overall_summary)}</i>\n\n"

	# --- CLAIMS ANALYSIS ---
	claims = data.get("analyzed_claims", [])
	if claims:
	message += "<b>🔍 CLAIMS ANALYSIS</b>\n"
	message += "━━━━━━━━━━━━━━━━━━━\n"

	for i, claim in enumerate(claims, 1):
	claim_text = claim.get("claim_text", "N/A")
	conclusion = claim.get("conclusion", "N/A")

	message += f"<b>{i}️⃣ Claim:</b> \"{html.escape(claim_text)}\"\n"
	message += f"<b>💡 Conclusion:</b> {html.escape(conclusion)}\n"

	# Evidence
	supporting = claim.get("supporting_evidence", [])
	opposing = claim.get("opposing_evidence", [])

	if supporting:
	message += "<b>✅ Supporting Evidence:</b>\n"
	for ev in supporting:
	src = html.escape(ev.get('source', 'Unknown'))
	summ = html.escape(ev.get('summary', ''))
	# Try to shorten source URL for display if it's a URL
	if src.startswith('http'):
	from urllib.parse import urlparse
	try:
	domain = urlparse(src).netloc
	src_display = domain
	except:
	src_display = "Link"
	else:
	src_display = src

	message += f"• {summ} <i>({src_display})</i>\n"

	if opposing:
	message += "<b>❌ Opposing Evidence:</b>\n"
	for ev in opposing:
	src = html.escape(ev.get('source', 'Unknown'))
	summ = html.escape(ev.get('summary', ''))
	# Try to shorten source URL for display
	if src.startswith('http'):
	from urllib.parse import urlparse
	try:
	domain = urlparse(src).netloc
	src_display = domain
	except:
	src_display = "Link"
	else:
	src_display = src

	message += f"• {summ} <i>({src_display})</i>\n"

	message += "\n"

	# --- FACT CHECKS ---
	all_fact_checks = []
	for claim in claims:
	all_fact_checks.extend(claim.get("fact_checking_results", []))

	# Filter out "None" URLs or empty results
	valid_fact_checks = [fc for fc in all_fact_checks if fc.get('url') and fc.get('url') != "None"]

	if valid_fact_checks:
	message += "<b>🔗 FACT CHECKS</b>\n"
	seen_urls = set()
	for fc in valid_fact_checks:
	url = fc.get('url', '#')
	if url not in seen_urls:
	# Use inference or source name if available, else domain
	source = fc.get('source', 'Fact Check')
	if source == 'Fact Check' and url != '#':
	from urllib.parse import urlparse
	try:
	source = urlparse(url).netloc
	except:
	pass

	source = html.escape(source)
	message += f"• <a href='{url}'>{source}</a>\n"
	seen_urls.add(url)
	message += "\n"

	# --- SOURCE CREDIBILITY DETAILS ---
	if source_cred_list:
	message += "<b>🛡️ SOURCE CREDIBILITY</b>\n"
	for item in source_cred_list[:5]: # Limit to top 5 to avoid spam
	url = item.get('url', '')
	score = item.get('credibility_score', 'N/A')
	category = item.get('category', 'Unknown')

	# Extract domain
	domain = "Unknown Source"
	if url:
	from urllib.parse import urlparse
	try:
	domain = urlparse(url).netloc
	except:
	domain = url

	message += f"• <b>{domain}</b>: {category} ({score})\n"
	message += "\n"

	# --- REVERSE IMAGE SEARCH (Optional) ---
	ris = data.get("reverse_image_search_data")
	if ris:
	ris_summary = ris.get("summary", "")
	matched = ris.get("matched_links", [])

	if ris_summary or matched:
	message += "<b>🖼️ IMAGE ANALYSIS</b>\n"
	if ris_summary:
	message += f"{html.escape(ris_summary)}\n"

	if matched:
	for match in matched[:3]:
	domain = html.escape(match.get('domain', 'Link'))
	url = match.get('url', '#')
	date = html.escape(match.get('date', ''))
	message += f"• <a href='{url}'>{domain}</a> ({date})\n"

	message += "\n<i>🤖 Analysis generated by Verifact</i>"
	return message

	except Exception as e:
	logger.error(f"Formatting Error: {e}")
	return "⚠️ <b>Format Error:</b> Data received, but could not be displayed properly."

	# ==========================================
	# 📡 BACKEND CONNECTOR
	# ==========================================
	async def query_backend_pipeline(form_data):
	"""
	Sends Multipart Form Data (Text + Files) to Cloud Run.
	"""
	headers = {}
	if API_KEY:
	headers["Authorization"] = f"Bearer {API_KEY}"

	timeout = aiohttp.ClientTimeout(total=60)

	async with aiohttp.ClientSession(timeout=timeout) as session:
	try:
	async with session.post(BACKEND_API_URL, data=form_data, headers=headers) as response:
	if response.status == 200:
	return await response.json()
	else:
	error_text = await response.text()
	logger.error(f"Backend Error {response.status}: {error_text}")
	return None
	except Exception as e:
	logger.error(f"Connection Error: {e}")
	return None

	# ==========================================
	# 🎮 BOT HANDLERS
	# ==========================================

	async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
	welcome_text = (
	"👋 <b>Verifact Forwarding Bot</b>\n\n"
	"I am connected to the misinformation analysis pipeline.\n"
	"Forward me any <b>Text</b> or <b>Image</b> to verify it."
	)
	await context.bot.send_message(chat_id=update.effective_chat.id, text=welcome_text, parse_mode='HTML')

	async def handle_text(update: Update, context: ContextTypes.DEFAULT_TYPE):
	user_text = update.message.text

	await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.TYPING)

	status_msg = await context.bot.send_message(
	chat_id=update.effective_chat.id,
	text="📡 <i>Verifact is analyzing text...</i>",
	parse_mode='HTML'
	)

	data = aiohttp.FormData()
	data.add_field('text', user_text)
	data.add_field('source', 'Telegram')

	json_response = await query_backend_pipeline(data)

	if json_response:
	report = format_analysis_report(json_response)
	await context.bot.edit_message_text(
	chat_id=update.effective_chat.id,
	message_id=status_msg.message_id,
	text=report,
	parse_mode='HTML',
	disable_web_page_preview=True
	)
	else:
	await context.bot.edit_message_text(
	chat_id=update.effective_chat.id,
	message_id=status_msg.message_id,
	text="⚠️ <b>System Error:</b> The pipeline is currently unreachable or timed out.",
	parse_mode='HTML'
	)

	async def handle_photo(update: Update, context: ContextTypes.DEFAULT_TYPE):
	await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.UPLOAD_PHOTO)

	status_msg = await context.bot.send_message(
	chat_id=update.effective_chat.id,
	text="📡 <i>Downloading media & analyzing...</i>",
	parse_mode='HTML'
	)

	try:
	photo = update.message.photo[-1]
	file_obj = await context.bot.get_file(photo.file_id)

	f_memory = io.BytesIO()
	await file_obj.download_to_memory(out=f_memory)
	f_memory.seek(0)

	data = aiohttp.FormData()
	caption_text = update.message.caption if update.message.caption else "Image analysis request"

	data.add_field('text', caption_text)
	data.add_field('source', 'Telegram')
	data.add_field('file', f_memory, filename='telegram_image.jpg', content_type='image/jpeg')

	json_response = await query_backend_pipeline(data)

	if json_response:
	report = format_analysis_report(json_response)
	await context.bot.edit_message_text(
	chat_id=update.effective_chat.id,
	message_id=status_msg.message_id,
	text=report,
	parse_mode='HTML',
	disable_web_page_preview=True
	)
	else:
	await context.bot.edit_message_text(
	chat_id=update.effective_chat.id,
	message_id=status_msg.message_id,
	text="⚠️ <b>Error:</b> Analysis failed or timed out.",
	parse_mode='HTML'
	)

	except Exception as e:
	logger.error(f"Image Handler Error: {e}")
	await context.bot.edit_message_text(
	chat_id=update.effective_chat.id,
	message_id=status_msg.message_id,
	text="❌ <b>Error:</b> Could not process the image file.",
	parse_mode='HTML'
	)

	# ==========================================
	# 🛠️ DIAGNOSTICS
	# ==========================================
	def check_network():
	"""Checks DNS resolution for critical services."""
	logger.info("--- NETWORK DIAGNOSTICS ---")

	targets = [
	("Telegram API", "api.telegram.org"),
	("Google", "google.com")
	]

	# Add Backend Host if parseable
	try:
	if BACKEND_API_URL:
	backend_host = urlparse(BACKEND_API_URL).netloc
	targets.append(("Backend API", backend_host))
	except:
	pass

	for name, host in targets:
	try:
	ip = socket.gethostbyname(host)
	logger.info(f"✅ {name} ({host}) resolved to {ip}")
	except socket.gaierror as e:
	logger.error(f"❌ {name} ({host}) DNS FAILURE: {e}")
	except Exception as e:
	logger.error(f"❌ {name} ({host}) Unexpected Error: {e}")

	logger.info("---------------------------")

	# ==========================================
	# 🚀 MAIN RUNNER
	# ==========================================
	if __name__ == '__main__':
	check_network()

	from telegram.request import HTTPXRequest

	# Use a robust request object with longer timeouts
	trequest = HTTPXRequest(connection_pool_size=8, read_timeout=20.0, write_timeout=20.0, connect_timeout=20.0)

	application = ApplicationBuilder().token(BOT_TOKEN).request(trequest).build()

	application.add_handler(MessageHandler(filters.COMMAND & filters.Regex(r'^/start$'), start))
	application.add_handler(MessageHandler(filters.TEXT & (~filters.COMMAND), handle_text))
	application.add_handler(MessageHandler(filters.PHOTO, handle_photo))

	print(f"✅ Bot is running.")
	print(f"🔗 Connected to Backend: {BACKEND_API_URL}")

	application.run_polling()