Spaces:

PercivalFletcher
/

Verifact-Bot

Runtime error

App Files Files Community

PercivalFletcher commited on Nov 27, 2025

Commit

909b715

verified ·

1 Parent(s): 31de3d5

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +20 -0
bot.py +353 -0
requirements.txt +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.11-slim
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+RUN pip install --no-cache-dir -r requirements.txt
+CMD ["python", "bot.py"]

bot.py ADDED Viewed

	@@ -0,0 +1,353 @@

+import logging
+import os
+import io
+import html
+import aiohttp
+from dotenv import load_dotenv  # <--- NEW IMPORT
+from telegram import Update, constants
+from telegram.ext import ApplicationBuilder, ContextTypes, MessageHandler, filters
+# ==========================================
+# ⚙️ CONFIGURATION
+# ==========================================
+# 1. Load environment variables from the .env file
+load_dotenv()
+# 2. Retrieve values
+BOT_TOKEN = os.getenv("BOT_TOKEN")
+BACKEND_API_URL = os.getenv("EXTERNAL_ANALYSIS_API_URL")
+API_KEY = os.getenv("API_KEY")
+# Check if critical vars are missing
+if not BOT_TOKEN or not BACKEND_API_URL:
+    raise ValueError("❌ Error: BOT_TOKEN or BACKEND_API_URL is missing from .env file")
+# ==========================================
+# 📝 LOGGING SETUP
+# ==========================================
+logging.basicConfig(
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    level=logging.INFO
+)
+logger = logging.getLogger(__name__)
+# ==========================================
+# 🧠 REPORT FORMATTER (JSON -> HTML)
+# ==========================================
+def format_analysis_report(data):
+    """
+    Converts the complex Backend JSON into a readable HTML Telegram message.
+    """
+    try:
+        # --- HEADER ---
+        tag = data.get("tag", "Analysis")
+        overall_summary = data.get("overall_summary", "No summary provided.")
+        source_cred_list = data.get("source_credibility_summary", [])
+        # Determine icon based on tag content
+        tag_lower = tag.lower()
+        if "true" in tag_lower or "verified" in tag_lower:
+            icon = "🟢"
+        elif "false" in tag_lower or "misinfo" in tag_lower or "fake" in tag_lower:
+            icon = "🔴"
+        else:
+            icon = "⚠️"
+        # Start building the message
+        message = f"<b>🚨 VERIFACT ANALYSIS REPORT</b>\n"
+        message += "━━━━━━━━━━━━━━━━━━━\n"
+        message += f"<b>Result:</b> {icon} <b>{html.escape(tag.upper())}</b>\n"
+        # Source Credibility Summary (Average)
+        if source_cred_list:
+            total_score = 0
+            count = 0
+            for item in source_cred_list:
+                if isinstance(item, dict) and 'credibility_score' in item:
+                    try:
+                        total_score += int(item['credibility_score'])
+                        count += 1
+                    except (ValueError, TypeError):
+                        pass
+            if count > 0:
+                avg_score = int(total_score / count)
+                # Determine label based on average
+                if avg_score >= 80:
+                    cred_label = "High"
+                elif avg_score >= 60:
+                    cred_label = "Moderate"
+                else:
+                    cred_label = "Low"
+                message += f"<b>Source Credibility:</b> {cred_label} ({avg_score}%)\n"
+        message += "\n<b>📝 Summary:</b>\n"
+        message += f"<i>{html.escape(overall_summary)}</i>\n\n"
+        # --- CLAIMS ANALYSIS ---
+        claims = data.get("analyzed_claims", [])
+        if claims:
+            message += "<b>🔍 CLAIMS ANALYSIS</b>\n"
+            message += "━━━━━━━━━━━━━━━━━━━\n"
+            for i, claim in enumerate(claims, 1):
+                claim_text = claim.get("claim_text", "N/A")
+                conclusion = claim.get("conclusion", "N/A")
+                message += f"<b>{i}️⃣ Claim:</b> \"{html.escape(claim_text)}\"\n"
+                message += f"<b>💡 Conclusion:</b> {html.escape(conclusion)}\n"
+                # Evidence
+                supporting = claim.get("supporting_evidence", [])
+                opposing = claim.get("opposing_evidence", [])
+                if supporting:
+                    message += "<b>✅ Supporting Evidence:</b>\n"
+                    for ev in supporting:
+                        src = html.escape(ev.get('source', 'Unknown'))
+                        summ = html.escape(ev.get('summary', ''))
+                        # Try to shorten source URL for display if it's a URL
+                        if src.startswith('http'):
+                            from urllib.parse import urlparse
+                            try:
+                                domain = urlparse(src).netloc
+                                src_display = domain
+                            except:
+                                src_display = "Link"
+                        else:
+                            src_display = src
+                        message += f"• {summ} <i>({src_display})</i>\n"
+                if opposing:
+                    message += "<b>❌ Opposing Evidence:</b>\n"
+                    for ev in opposing:
+                        src = html.escape(ev.get('source', 'Unknown'))
+                        summ = html.escape(ev.get('summary', ''))
+                        # Try to shorten source URL for display
+                        if src.startswith('http'):
+                            from urllib.parse import urlparse
+                            try:
+                                domain = urlparse(src).netloc
+                                src_display = domain
+                            except:
+                                src_display = "Link"
+                        else:
+                            src_display = src
+                        message += f"• {summ} <i>({src_display})</i>\n"
+                message += "\n"
+        # --- FACT CHECKS ---
+        all_fact_checks = []
+        for claim in claims:
+            all_fact_checks.extend(claim.get("fact_checking_results", []))
+        # Filter out "None" URLs or empty results
+        valid_fact_checks = [fc for fc in all_fact_checks if fc.get('url') and fc.get('url') != "None"]
+        if valid_fact_checks:
+            message += "<b>🔗 FACT CHECKS</b>\n"
+            seen_urls = set()
+            for fc in valid_fact_checks:
+                url = fc.get('url', '#')
+                if url not in seen_urls:
+                    # Use inference or source name if available, else domain
+                    source = fc.get('source', 'Fact Check')
+                    if source == 'Fact Check' and url != '#':
+                         from urllib.parse import urlparse
+                         try:
+                             source = urlparse(url).netloc
+                         except:
+                             pass
+                    source = html.escape(source)
+                    message += f"• <a href='{url}'>{source}</a>\n"
+                    seen_urls.add(url)
+            message += "\n"
+        # --- SOURCE CREDIBILITY DETAILS ---
+        if source_cred_list:
+             message += "<b>🛡️ SOURCE CREDIBILITY</b>\n"
+             for item in source_cred_list[:5]: # Limit to top 5 to avoid spam
+                 url = item.get('url', '')
+                 score = item.get('credibility_score', 'N/A')
+                 category = item.get('category', 'Unknown')
+                 # Extract domain
+                 domain = "Unknown Source"
+                 if url:
+                     from urllib.parse import urlparse
+                     try:
+                         domain = urlparse(url).netloc
+                     except:
+                         domain = url
+                 message += f"• <b>{domain}</b>: {category} ({score})\n"
+             message += "\n"
+        # --- REVERSE IMAGE SEARCH (Optional) ---
+        ris = data.get("reverse_image_search_data")
+        if ris:
+            ris_summary = ris.get("summary", "")
+            matched = ris.get("matched_links", [])
+            if ris_summary or matched:
+                message += "<b>🖼️ IMAGE ANALYSIS</b>\n"
+                if ris_summary:
+                    message += f"{html.escape(ris_summary)}\n"
+                if matched:
+                    for match in matched[:3]:
+                        domain = html.escape(match.get('domain', 'Link'))
+                        url = match.get('url', '#')
+                        date = html.escape(match.get('date', ''))
+                        message += f"• <a href='{url}'>{domain}</a> ({date})\n"
+        message += "\n<i>🤖 Analysis generated by Verifact</i>"
+        return message
+    except Exception as e:
+        logger.error(f"Formatting Error: {e}")
+        return "⚠️ <b>Format Error:</b> Data received, but could not be displayed properly."
+# ==========================================
+# 📡 BACKEND CONNECTOR
+# ==========================================
+async def query_backend_pipeline(form_data):
+    """
+    Sends Multipart Form Data (Text + Files) to Cloud Run.
+    """
+    headers = {}
+    if API_KEY:
+        headers["Authorization"] = f"Bearer {API_KEY}"
+    timeout = aiohttp.ClientTimeout(total=60)
+    async with aiohttp.ClientSession(timeout=timeout) as session:
+        try:
+            async with session.post(BACKEND_API_URL, data=form_data, headers=headers) as response:
+                if response.status == 200:
+                    return await response.json()
+                else:
+                    error_text = await response.text()
+                    logger.error(f"Backend Error {response.status}: {error_text}")
+                    return None
+        except Exception as e:
+            logger.error(f"Connection Error: {e}")
+            return None
+# ==========================================
+# 🎮 BOT HANDLERS
+# ==========================================
+async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    welcome_text = (
+        "👋 <b>Verifact Forwarding Bot</b>\n\n"
+        "I am connected to the misinformation analysis pipeline.\n"
+        "Forward me any <b>Text</b> or <b>Image</b> to verify it."
+    )
+    await context.bot.send_message(chat_id=update.effective_chat.id, text=welcome_text, parse_mode='HTML')
+async def handle_text(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    user_text = update.message.text
+    await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.TYPING)
+    status_msg = await context.bot.send_message(
+        chat_id=update.effective_chat.id,
+        text="📡 <i>Verifact is analyzing text...</i>",
+        parse_mode='HTML'
+    )
+    data = aiohttp.FormData()
+    data.add_field('text', user_text)
+    data.add_field('source', 'Telegram')
+    json_response = await query_backend_pipeline(data)
+    if json_response:
+        report = format_analysis_report(json_response)
+        await context.bot.edit_message_text(
+            chat_id=update.effective_chat.id,
+            message_id=status_msg.message_id,
+            text=report,
+            parse_mode='HTML',
+            disable_web_page_preview=True
+        )
+    else:
+        await context.bot.edit_message_text(
+            chat_id=update.effective_chat.id,
+            message_id=status_msg.message_id,
+            text="⚠️ <b>System Error:</b> The pipeline is currently unreachable or timed out.",
+            parse_mode='HTML'
+        )
+async def handle_photo(update: Update, context: ContextTypes.DEFAULT_TYPE):
+    await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.UPLOAD_PHOTO)
+    status_msg = await context.bot.send_message(
+        chat_id=update.effective_chat.id,
+        text="📡 <i>Downloading media & analyzing...</i>",
+        parse_mode='HTML'
+    )
+    try:
+        photo = update.message.photo[-1]
+        file_obj = await context.bot.get_file(photo.file_id)
+        f_memory = io.BytesIO()
+        await file_obj.download_to_memory(out=f_memory)
+        f_memory.seek(0)
+        data = aiohttp.FormData()
+        caption_text = update.message.caption if update.message.caption else "Image analysis request"
+        data.add_field('text', caption_text)
+        data.add_field('source', 'Telegram')
+        data.add_field('file', f_memory, filename='telegram_image.jpg', content_type='image/jpeg')
+        json_response = await query_backend_pipeline(data)
+        if json_response:
+            report = format_analysis_report(json_response)
+            await context.bot.edit_message_text(
+                chat_id=update.effective_chat.id,
+                message_id=status_msg.message_id,
+                text=report,
+                parse_mode='HTML',
+                disable_web_page_preview=True
+            )
+        else:
+            await context.bot.edit_message_text(
+                chat_id=update.effective_chat.id,
+                message_id=status_msg.message_id,
+                text="⚠️ <b>Error:</b> Analysis failed or timed out.",
+                parse_mode='HTML'
+            )
+    except Exception as e:
+        logger.error(f"Image Handler Error: {e}")
+        await context.bot.edit_message_text(
+            chat_id=update.effective_chat.id,
+            message_id=status_msg.message_id,
+            text="❌ <b>Error:</b> Could not process the image file.",
+            parse_mode='HTML'
+        )
+# ==========================================
+# 🚀 MAIN RUNNER
+# ==========================================
+if __name__ == '__main__':
+    application = ApplicationBuilder().token(BOT_TOKEN).build()
+    application.add_handler(MessageHandler(filters.COMMAND & filters.Regex(r'^/start$'), start))
+    application.add_handler(MessageHandler(filters.TEXT & (~filters.COMMAND), handle_text))
+    application.add_handler(MessageHandler(filters.PHOTO, handle_photo))
+    print(f"✅ Bot is running.")
+    print(f"🔗 Connected to Backend: {BACKEND_API_URL}")
+    application.run_polling()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+python-telegram-bot
+aiohttp
+python-dotenv