PercivalFletcher commited on
Commit
909b715
ยท
verified ยท
1 Parent(s): 31de3d5

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +20 -0
  2. bot.py +353 -0
  3. requirements.txt +3 -0
Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set up a new user named "user" with user ID 1000
4
+ RUN useradd -m -u 1000 user
5
+
6
+ # Switch to the "user" user
7
+ USER user
8
+
9
+ # Set home to the user's home directory
10
+ ENV HOME=/home/user \
11
+ PATH=/home/user/.local/bin:$PATH
12
+
13
+ WORKDIR $HOME/app
14
+
15
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
16
+ COPY --chown=user . $HOME/app
17
+
18
+ RUN pip install --no-cache-dir -r requirements.txt
19
+
20
+ CMD ["python", "bot.py"]
bot.py ADDED
@@ -0,0 +1,353 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import io
4
+ import html
5
+ import aiohttp
6
+ from dotenv import load_dotenv # <--- NEW IMPORT
7
+ from telegram import Update, constants
8
+ from telegram.ext import ApplicationBuilder, ContextTypes, MessageHandler, filters
9
+
10
+ # ==========================================
11
+ # โš™๏ธ CONFIGURATION
12
+ # ==========================================
13
+
14
+ # 1. Load environment variables from the .env file
15
+ load_dotenv()
16
+
17
+ # 2. Retrieve values
18
+ BOT_TOKEN = os.getenv("BOT_TOKEN")
19
+ BACKEND_API_URL = os.getenv("EXTERNAL_ANALYSIS_API_URL")
20
+ API_KEY = os.getenv("API_KEY")
21
+
22
+ # Check if critical vars are missing
23
+ if not BOT_TOKEN or not BACKEND_API_URL:
24
+ raise ValueError("โŒ Error: BOT_TOKEN or BACKEND_API_URL is missing from .env file")
25
+
26
+ # ==========================================
27
+ # ๐Ÿ“ LOGGING SETUP
28
+ # ==========================================
29
+ logging.basicConfig(
30
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
31
+ level=logging.INFO
32
+ )
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # ==========================================
36
+ # ๐Ÿง  REPORT FORMATTER (JSON -> HTML)
37
+ # ==========================================
38
+ def format_analysis_report(data):
39
+ """
40
+ Converts the complex Backend JSON into a readable HTML Telegram message.
41
+ """
42
+ try:
43
+ # --- HEADER ---
44
+ tag = data.get("tag", "Analysis")
45
+ overall_summary = data.get("overall_summary", "No summary provided.")
46
+ source_cred_list = data.get("source_credibility_summary", [])
47
+
48
+ # Determine icon based on tag content
49
+ tag_lower = tag.lower()
50
+ if "true" in tag_lower or "verified" in tag_lower:
51
+ icon = "๐ŸŸข"
52
+ elif "false" in tag_lower or "misinfo" in tag_lower or "fake" in tag_lower:
53
+ icon = "๐Ÿ”ด"
54
+ else:
55
+ icon = "โš ๏ธ"
56
+
57
+ # Start building the message
58
+ message = f"<b>๐Ÿšจ VERIFACT ANALYSIS REPORT</b>\n"
59
+ message += "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n"
60
+ message += f"<b>Result:</b> {icon} <b>{html.escape(tag.upper())}</b>\n"
61
+
62
+ # Source Credibility Summary (Average)
63
+ if source_cred_list:
64
+ total_score = 0
65
+ count = 0
66
+ for item in source_cred_list:
67
+ if isinstance(item, dict) and 'credibility_score' in item:
68
+ try:
69
+ total_score += int(item['credibility_score'])
70
+ count += 1
71
+ except (ValueError, TypeError):
72
+ pass
73
+
74
+ if count > 0:
75
+ avg_score = int(total_score / count)
76
+ # Determine label based on average
77
+ if avg_score >= 80:
78
+ cred_label = "High"
79
+ elif avg_score >= 60:
80
+ cred_label = "Moderate"
81
+ else:
82
+ cred_label = "Low"
83
+ message += f"<b>Source Credibility:</b> {cred_label} ({avg_score}%)\n"
84
+
85
+ message += "\n<b>๐Ÿ“ Summary:</b>\n"
86
+ message += f"<i>{html.escape(overall_summary)}</i>\n\n"
87
+
88
+ # --- CLAIMS ANALYSIS ---
89
+ claims = data.get("analyzed_claims", [])
90
+ if claims:
91
+ message += "<b>๐Ÿ” CLAIMS ANALYSIS</b>\n"
92
+ message += "โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”\n"
93
+
94
+ for i, claim in enumerate(claims, 1):
95
+ claim_text = claim.get("claim_text", "N/A")
96
+ conclusion = claim.get("conclusion", "N/A")
97
+
98
+ message += f"<b>{i}๏ธโƒฃ Claim:</b> \"{html.escape(claim_text)}\"\n"
99
+ message += f"<b>๐Ÿ’ก Conclusion:</b> {html.escape(conclusion)}\n"
100
+
101
+ # Evidence
102
+ supporting = claim.get("supporting_evidence", [])
103
+ opposing = claim.get("opposing_evidence", [])
104
+
105
+ if supporting:
106
+ message += "<b>โœ… Supporting Evidence:</b>\n"
107
+ for ev in supporting:
108
+ src = html.escape(ev.get('source', 'Unknown'))
109
+ summ = html.escape(ev.get('summary', ''))
110
+ # Try to shorten source URL for display if it's a URL
111
+ if src.startswith('http'):
112
+ from urllib.parse import urlparse
113
+ try:
114
+ domain = urlparse(src).netloc
115
+ src_display = domain
116
+ except:
117
+ src_display = "Link"
118
+ else:
119
+ src_display = src
120
+
121
+ message += f"โ€ข {summ} <i>({src_display})</i>\n"
122
+
123
+ if opposing:
124
+ message += "<b>โŒ Opposing Evidence:</b>\n"
125
+ for ev in opposing:
126
+ src = html.escape(ev.get('source', 'Unknown'))
127
+ summ = html.escape(ev.get('summary', ''))
128
+ # Try to shorten source URL for display
129
+ if src.startswith('http'):
130
+ from urllib.parse import urlparse
131
+ try:
132
+ domain = urlparse(src).netloc
133
+ src_display = domain
134
+ except:
135
+ src_display = "Link"
136
+ else:
137
+ src_display = src
138
+
139
+ message += f"โ€ข {summ} <i>({src_display})</i>\n"
140
+
141
+ message += "\n"
142
+
143
+ # --- FACT CHECKS ---
144
+ all_fact_checks = []
145
+ for claim in claims:
146
+ all_fact_checks.extend(claim.get("fact_checking_results", []))
147
+
148
+ # Filter out "None" URLs or empty results
149
+ valid_fact_checks = [fc for fc in all_fact_checks if fc.get('url') and fc.get('url') != "None"]
150
+
151
+ if valid_fact_checks:
152
+ message += "<b>๐Ÿ”— FACT CHECKS</b>\n"
153
+ seen_urls = set()
154
+ for fc in valid_fact_checks:
155
+ url = fc.get('url', '#')
156
+ if url not in seen_urls:
157
+ # Use inference or source name if available, else domain
158
+ source = fc.get('source', 'Fact Check')
159
+ if source == 'Fact Check' and url != '#':
160
+ from urllib.parse import urlparse
161
+ try:
162
+ source = urlparse(url).netloc
163
+ except:
164
+ pass
165
+
166
+ source = html.escape(source)
167
+ message += f"โ€ข <a href='{url}'>{source}</a>\n"
168
+ seen_urls.add(url)
169
+ message += "\n"
170
+
171
+ # --- SOURCE CREDIBILITY DETAILS ---
172
+ if source_cred_list:
173
+ message += "<b>๐Ÿ›ก๏ธ SOURCE CREDIBILITY</b>\n"
174
+ for item in source_cred_list[:5]: # Limit to top 5 to avoid spam
175
+ url = item.get('url', '')
176
+ score = item.get('credibility_score', 'N/A')
177
+ category = item.get('category', 'Unknown')
178
+
179
+ # Extract domain
180
+ domain = "Unknown Source"
181
+ if url:
182
+ from urllib.parse import urlparse
183
+ try:
184
+ domain = urlparse(url).netloc
185
+ except:
186
+ domain = url
187
+
188
+ message += f"โ€ข <b>{domain}</b>: {category} ({score})\n"
189
+ message += "\n"
190
+
191
+ # --- REVERSE IMAGE SEARCH (Optional) ---
192
+ ris = data.get("reverse_image_search_data")
193
+ if ris:
194
+ ris_summary = ris.get("summary", "")
195
+ matched = ris.get("matched_links", [])
196
+
197
+ if ris_summary or matched:
198
+ message += "<b>๐Ÿ–ผ๏ธ IMAGE ANALYSIS</b>\n"
199
+ if ris_summary:
200
+ message += f"{html.escape(ris_summary)}\n"
201
+
202
+ if matched:
203
+ for match in matched[:3]:
204
+ domain = html.escape(match.get('domain', 'Link'))
205
+ url = match.get('url', '#')
206
+ date = html.escape(match.get('date', ''))
207
+ message += f"โ€ข <a href='{url}'>{domain}</a> ({date})\n"
208
+
209
+ message += "\n<i>๐Ÿค– Analysis generated by Verifact</i>"
210
+ return message
211
+
212
+ except Exception as e:
213
+ logger.error(f"Formatting Error: {e}")
214
+ return "โš ๏ธ <b>Format Error:</b> Data received, but could not be displayed properly."
215
+
216
+ # ==========================================
217
+ # ๐Ÿ“ก BACKEND CONNECTOR
218
+ # ==========================================
219
+ async def query_backend_pipeline(form_data):
220
+ """
221
+ Sends Multipart Form Data (Text + Files) to Cloud Run.
222
+ """
223
+ headers = {}
224
+ if API_KEY:
225
+ headers["Authorization"] = f"Bearer {API_KEY}"
226
+
227
+ timeout = aiohttp.ClientTimeout(total=60)
228
+
229
+ async with aiohttp.ClientSession(timeout=timeout) as session:
230
+ try:
231
+ async with session.post(BACKEND_API_URL, data=form_data, headers=headers) as response:
232
+ if response.status == 200:
233
+ return await response.json()
234
+ else:
235
+ error_text = await response.text()
236
+ logger.error(f"Backend Error {response.status}: {error_text}")
237
+ return None
238
+ except Exception as e:
239
+ logger.error(f"Connection Error: {e}")
240
+ return None
241
+
242
+ # ==========================================
243
+ # ๐ŸŽฎ BOT HANDLERS
244
+ # ==========================================
245
+
246
+ async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
247
+ welcome_text = (
248
+ "๐Ÿ‘‹ <b>Verifact Forwarding Bot</b>\n\n"
249
+ "I am connected to the misinformation analysis pipeline.\n"
250
+ "Forward me any <b>Text</b> or <b>Image</b> to verify it."
251
+ )
252
+ await context.bot.send_message(chat_id=update.effective_chat.id, text=welcome_text, parse_mode='HTML')
253
+
254
+ async def handle_text(update: Update, context: ContextTypes.DEFAULT_TYPE):
255
+ user_text = update.message.text
256
+
257
+ await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.TYPING)
258
+
259
+ status_msg = await context.bot.send_message(
260
+ chat_id=update.effective_chat.id,
261
+ text="๐Ÿ“ก <i>Verifact is analyzing text...</i>",
262
+ parse_mode='HTML'
263
+ )
264
+
265
+ data = aiohttp.FormData()
266
+ data.add_field('text', user_text)
267
+ data.add_field('source', 'Telegram')
268
+
269
+ json_response = await query_backend_pipeline(data)
270
+
271
+ if json_response:
272
+ report = format_analysis_report(json_response)
273
+ await context.bot.edit_message_text(
274
+ chat_id=update.effective_chat.id,
275
+ message_id=status_msg.message_id,
276
+ text=report,
277
+ parse_mode='HTML',
278
+ disable_web_page_preview=True
279
+ )
280
+ else:
281
+ await context.bot.edit_message_text(
282
+ chat_id=update.effective_chat.id,
283
+ message_id=status_msg.message_id,
284
+ text="โš ๏ธ <b>System Error:</b> The pipeline is currently unreachable or timed out.",
285
+ parse_mode='HTML'
286
+ )
287
+
288
+ async def handle_photo(update: Update, context: ContextTypes.DEFAULT_TYPE):
289
+ await context.bot.send_chat_action(chat_id=update.effective_chat.id, action=constants.ChatAction.UPLOAD_PHOTO)
290
+
291
+ status_msg = await context.bot.send_message(
292
+ chat_id=update.effective_chat.id,
293
+ text="๐Ÿ“ก <i>Downloading media & analyzing...</i>",
294
+ parse_mode='HTML'
295
+ )
296
+
297
+ try:
298
+ photo = update.message.photo[-1]
299
+ file_obj = await context.bot.get_file(photo.file_id)
300
+
301
+ f_memory = io.BytesIO()
302
+ await file_obj.download_to_memory(out=f_memory)
303
+ f_memory.seek(0)
304
+
305
+ data = aiohttp.FormData()
306
+ caption_text = update.message.caption if update.message.caption else "Image analysis request"
307
+
308
+ data.add_field('text', caption_text)
309
+ data.add_field('source', 'Telegram')
310
+ data.add_field('file', f_memory, filename='telegram_image.jpg', content_type='image/jpeg')
311
+
312
+ json_response = await query_backend_pipeline(data)
313
+
314
+ if json_response:
315
+ report = format_analysis_report(json_response)
316
+ await context.bot.edit_message_text(
317
+ chat_id=update.effective_chat.id,
318
+ message_id=status_msg.message_id,
319
+ text=report,
320
+ parse_mode='HTML',
321
+ disable_web_page_preview=True
322
+ )
323
+ else:
324
+ await context.bot.edit_message_text(
325
+ chat_id=update.effective_chat.id,
326
+ message_id=status_msg.message_id,
327
+ text="โš ๏ธ <b>Error:</b> Analysis failed or timed out.",
328
+ parse_mode='HTML'
329
+ )
330
+
331
+ except Exception as e:
332
+ logger.error(f"Image Handler Error: {e}")
333
+ await context.bot.edit_message_text(
334
+ chat_id=update.effective_chat.id,
335
+ message_id=status_msg.message_id,
336
+ text="โŒ <b>Error:</b> Could not process the image file.",
337
+ parse_mode='HTML'
338
+ )
339
+
340
+ # ==========================================
341
+ # ๐Ÿš€ MAIN RUNNER
342
+ # ==========================================
343
+ if __name__ == '__main__':
344
+ application = ApplicationBuilder().token(BOT_TOKEN).build()
345
+
346
+ application.add_handler(MessageHandler(filters.COMMAND & filters.Regex(r'^/start$'), start))
347
+ application.add_handler(MessageHandler(filters.TEXT & (~filters.COMMAND), handle_text))
348
+ application.add_handler(MessageHandler(filters.PHOTO, handle_photo))
349
+
350
+ print(f"โœ… Bot is running.")
351
+ print(f"๐Ÿ”— Connected to Backend: {BACKEND_API_URL}")
352
+
353
+ application.run_polling()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ python-telegram-bot
2
+ aiohttp
3
+ python-dotenv