MuhammadAhmadZia commited on
Commit
e9fe853
Β·
verified Β·
1 Parent(s): cf8928d

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app - Copy.py +541 -0
  2. app.py +13 -2
app - Copy.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Version 3 β€” Multi-Turn AI Chatbot with Persistent Storage
3
+
4
+ This version extends Version 2 with three major enhancements:
5
+ 1. Multi-Turn Conversation (Short-term/Session Memory)
6
+ 2. Persistent Storage (Cross-Session Memory via JSON file)
7
+ 3. Editable User Preferences (injected into system prompt)
8
+
9
+ All features from Version 2 (Website Scraper + YouTube Transcript) are carried forward.
10
+
11
+ Usage:
12
+ 1. Set environment variables: GROQ_API_KEY, BRIGHT_DATA_USERNAME, BRIGHT_DATA_PASSWORD
13
+ 2. pip install -r requirements.txt
14
+ 3. python app.py
15
+ """
16
+
17
+ import os
18
+ import json
19
+ import requests
20
+ import gradio as gr
21
+ from openai import OpenAI
22
+ from bs4 import BeautifulSoup
23
+ from dotenv import load_dotenv
24
+ from youtube_transcript_api import YouTubeTranscriptApi
25
+
26
+ # ─── Load environment variables ────────────────────────────────────────────────
27
+ # Try loading from the keys folder (local dev) or current dir (HF Spaces)
28
+ load_dotenv("../../keys/.env", override=True)
29
+ load_dotenv(".env", override=True)
30
+
31
+ groq_api_key = os.getenv("GROQ_API_KEY") or os.getenv("GROQ_API_Key")
32
+ bright_data_username = os.getenv("BRIGHT_DATA_USERNAME")
33
+ bright_data_password = os.getenv("BRIGHT_DATA_PASSWORD")
34
+
35
+ # ─── Set up Groq client (OpenAI-compatible API) ───────────────────────────────
36
+ client = OpenAI(
37
+ base_url="https://api.groq.com/openai/v1",
38
+ api_key=groq_api_key
39
+ )
40
+
41
+ MODEL = "llama-3.3-70b-versatile"
42
+
43
+ # ─── Global variables ─────────────────────────────────────────────────────────
44
+ scraped_data = "" # Stores website scraped data (Tab 1)
45
+ transcript_data = "" # Stores YouTube transcript data (Tab 2)
46
+
47
+ # ─── File paths for persistent storage ─────────────────────────────────────────
48
+ CHAT_HISTORY_FILE = "chat_history.json"
49
+ USER_PREFERENCES_FILE = "user_preferences.json"
50
+
51
+ # ─── Global conversation history (stored in RAM during runtime) ────────────────
52
+ conversation_history = []
53
+
54
+
55
+ # ══════════════════════════════════════════════════════════════════════════════
56
+ # PERSISTENT STORAGE FUNCTIONS
57
+ # ══════════════════════════════════════════════════════════════════════════════
58
+
59
+ def load_chat_history():
60
+ """
61
+ Load previous conversation history from the JSON file on disk.
62
+ Called once at startup so the bot remembers past conversations.
63
+ """
64
+ global conversation_history
65
+ if os.path.exists(CHAT_HISTORY_FILE):
66
+ try:
67
+ with open(CHAT_HISTORY_FILE, "r") as f:
68
+ conversation_history = json.load(f)
69
+ print(f"βœ… Loaded {len(conversation_history)} messages from {CHAT_HISTORY_FILE}")
70
+ except Exception as e:
71
+ print(f"❌ Error loading chat history: {e}")
72
+ conversation_history = []
73
+ else:
74
+ conversation_history = []
75
+ print("No previous chat history found. Starting fresh.")
76
+
77
+
78
+ def save_chat_history():
79
+ """
80
+ Save the current conversation history to a JSON file on disk.
81
+ Called after every interaction so nothing is lost on restart.
82
+ """
83
+ try:
84
+ with open(CHAT_HISTORY_FILE, "w") as f:
85
+ json.dump(conversation_history, f, indent=2)
86
+ print(f"πŸ’Ύ Saved {len(conversation_history)} messages to {CHAT_HISTORY_FILE}")
87
+ except Exception as e:
88
+ print(f"❌ Error saving chat history: {e}")
89
+
90
+
91
+ def load_user_preferences():
92
+ """Load user preferences from the JSON file on disk."""
93
+ if os.path.exists(USER_PREFERENCES_FILE):
94
+ try:
95
+ with open(USER_PREFERENCES_FILE, "r") as f:
96
+ data = json.load(f)
97
+ return data.get("preferences", "")
98
+ except Exception as e:
99
+ print(f"❌ Error loading preferences: {e}")
100
+ return ""
101
+ return ""
102
+
103
+
104
+ def save_user_preferences(preferences_text):
105
+ """Save user preferences to a JSON file on disk."""
106
+ try:
107
+ with open(USER_PREFERENCES_FILE, "w") as f:
108
+ json.dump({"preferences": preferences_text}, f, indent=2)
109
+ print(f"πŸ’Ύ Saved user preferences to {USER_PREFERENCES_FILE}")
110
+ except Exception as e:
111
+ print(f"❌ Error saving preferences: {e}")
112
+
113
+
114
+ def get_display_history():
115
+ """
116
+ Convert conversation_history (list of dicts) into Gradio Chatbot format.
117
+ Gradio expects a list of {"role": "user"/"assistant", "content": "..."} dicts.
118
+ We filter out system messages since they shouldn't be displayed.
119
+ """
120
+ display_history = []
121
+ for msg in conversation_history:
122
+ if msg["role"] in ("user", "assistant"):
123
+ display_history.append({"role": msg["role"], "content": msg["content"]})
124
+ return display_history
125
+
126
+
127
+ # ══════════════════════════════════════════════════════════════════════════════
128
+ # TAB 1: WEBSITE SCRAPER (carried forward from Version 1 & 2)
129
+ # ══════════════════════════════════════════════════════════════════════════════
130
+
131
+ def scrape_website(url):
132
+ """Scrape a bot-protected website using Bright Data Web Unlocker proxy."""
133
+ try:
134
+ print(f"Scraping URL: {url}")
135
+ if bright_data_username and bright_data_password:
136
+ proxy_url = f"http://{bright_data_username}:{bright_data_password}@brd.superproxy.io:33335"
137
+ proxies = {"http": proxy_url, "https": proxy_url}
138
+ print("Using Bright Data Web Unlocker proxy to bypass bot protection...")
139
+ response = requests.get(url, proxies=proxies, timeout=60, verify=False)
140
+ else:
141
+ print("Bright Data credentials not found. Using standard requests...")
142
+ headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
143
+ response = requests.get(url, headers=headers, timeout=15, verify=False)
144
+ response.raise_for_status()
145
+ print(f"Successfully scraped! Status code: {response.status_code}")
146
+ return response.text
147
+ except requests.exceptions.RequestException as e:
148
+ return f"Error scraping website: {str(e)}"
149
+
150
+
151
+ def parse_goodreads_books(html_content):
152
+ """Parse Goodreads Best Books page HTML and extract book data."""
153
+ soup = BeautifulSoup(html_content, "html.parser")
154
+ books = []
155
+
156
+ book_rows = soup.select("tr[itemtype='http://schema.org/Book']")
157
+ if book_rows:
158
+ for i, row in enumerate(book_rows, 1):
159
+ title_tag = row.select_one(".bookTitle span")
160
+ title = title_tag.get_text(strip=True) if title_tag else "Unknown Title"
161
+ author_tag = row.select_one(".authorName span")
162
+ author = author_tag.get_text(strip=True) if author_tag else "Unknown Author"
163
+ rating_tag = row.select_one(".minirating")
164
+ rating = rating_tag.get_text(strip=True) if rating_tag else "No Rating"
165
+ books.append({"rank": i, "title": title, "author": author, "rating": rating})
166
+
167
+ if not books:
168
+ title_tags = soup.select("a.bookTitle") or soup.select("[class*='bookTitle']")
169
+ author_tags = soup.select("a.authorName") or soup.select("[class*='authorName']")
170
+ rating_tags = soup.select(".minirating") or soup.select("[class*='rating']")
171
+ for i in range(len(title_tags)):
172
+ title = title_tags[i].get_text(strip=True) if i < len(title_tags) else "Unknown"
173
+ author = author_tags[i].get_text(strip=True) if i < len(author_tags) else "Unknown"
174
+ rating = rating_tags[i].get_text(strip=True) if i < len(rating_tags) else "N/A"
175
+ books.append({"rank": i + 1, "title": title, "author": author, "rating": rating})
176
+
177
+ if not books:
178
+ text_content = ""
179
+ if soup.body:
180
+ for tag in soup.body(["script", "style", "img", "input"]):
181
+ tag.decompose()
182
+ text_content = soup.body.get_text(separator="\n", strip=True)
183
+ return f"Could not parse structured book data. Raw content:\n\n{text_content[:5000]}"
184
+
185
+ result = f"Found {len(books)} books:\n\n"
186
+ for book in books:
187
+ result += f"Rank #{book['rank']}: {book['title']} by {book['author']} β€” {book['rating']}\n"
188
+ return result
189
+
190
+
191
+ def scrape_and_display(url):
192
+ """Scrape a website and store the data for Q&A."""
193
+ global scraped_data
194
+ if not url or not url.strip():
195
+ return "❗ Please enter a valid URL."
196
+ html_content = scrape_website(url)
197
+ if html_content.startswith("Error"):
198
+ return html_content
199
+ parsed_data = parse_goodreads_books(html_content)
200
+ scraped_data = parsed_data
201
+ return f"βœ… Website scraped successfully!\n\n{parsed_data}"
202
+
203
+
204
+ def ask_ai_website(user_question, history):
205
+ """Q&A function for website scraped data (Tab 1)."""
206
+ global scraped_data
207
+ if not scraped_data:
208
+ return "⚠️ No data scraped yet! Enter a URL above and click 'Scrape Website' first."
209
+
210
+ system_prompt = f"""You are a helpful assistant that answers questions based ONLY on the provided scraped website data.
211
+ RULES: Only use info from the data below. If not available, say so. Be concise.
212
+
213
+ Scraped Data:
214
+ {scraped_data}"""
215
+
216
+ try:
217
+ response = client.chat.completions.create(
218
+ model=MODEL,
219
+ messages=[
220
+ {"role": "system", "content": system_prompt},
221
+ {"role": "user", "content": user_question}
222
+ ],
223
+ temperature=0.3
224
+ )
225
+ return response.choices[0].message.content
226
+ except Exception as e:
227
+ return f"❌ Error: {str(e)}"
228
+
229
+
230
+ # ══════════════════════════════════════════════════════════════════════════════
231
+ # TAB 2: YOUTUBE TRANSCRIPT Q&A (carried forward from Version 2)
232
+ # ══════════════════════════════════════════════════════════════════════════════
233
+
234
+ def fetch_transcript(video_id):
235
+ """Fetch the transcript of a YouTube video using its Video ID."""
236
+ global transcript_data
237
+ if not video_id or not video_id.strip():
238
+ return "❗ Please enter a valid YouTube Video ID."
239
+
240
+ video_id = video_id.strip()
241
+ try:
242
+ api = YouTubeTranscriptApi()
243
+ transcript = api.fetch(video_id)
244
+ transcript_text = " ".join([snippet.text for snippet in transcript])
245
+ transcript_data = transcript_text
246
+ return f"βœ… Transcript fetched! ({len(transcript_text)} chars)\n\n{transcript_text[:2000]}{'...' if len(transcript_text) > 2000 else ''}"
247
+ except Exception as e:
248
+ transcript_data = ""
249
+ return f"❌ Error fetching transcript: {str(e)}\n\nMake sure the Video ID is correct and the video has captions."
250
+
251
+
252
+ def ask_ai_youtube(user_question, history):
253
+ """Q&A function for YouTube transcript data (Tab 2)."""
254
+ global transcript_data
255
+ if not transcript_data:
256
+ return "⚠️ No transcript fetched yet! Enter a Video ID above and click 'Fetch Transcript' first."
257
+
258
+ system_prompt = f"""You are a helpful assistant that answers questions based ONLY on the provided YouTube video transcript.
259
+ RULES: Only use info from the transcript below. If not available, say so. Be concise.
260
+
261
+ Transcript:
262
+ {transcript_data}"""
263
+
264
+ try:
265
+ response = client.chat.completions.create(
266
+ model=MODEL,
267
+ messages=[
268
+ {"role": "system", "content": system_prompt},
269
+ {"role": "user", "content": user_question}
270
+ ],
271
+ temperature=0.3
272
+ )
273
+ return response.choices[0].message.content
274
+ except Exception as e:
275
+ return f"❌ Error: {str(e)}"
276
+
277
+
278
+ # ══════════════════════════════════════════════════════════════════════════════
279
+ # TAB 3: MULTI-TURN AI CHAT WITH PERSISTENT MEMORY (new in Version 3)
280
+ # ══════════════════════════════════════════════════════════════════════════════
281
+
282
+ def chat_with_memory(user_message, history, user_preferences):
283
+ """
284
+ Multi-turn chat function with persistent memory.
285
+
286
+ How it works:
287
+ 1. Loads user preferences and injects them into the system prompt
288
+ 2. Appends the user message to conversation_history
289
+ 3. Sends the ENTIRE conversation_history to the LLM (so it has full context)
290
+ 4. Appends the assistant response to conversation_history
291
+ 5. Saves everything to disk immediately
292
+
293
+ Args:
294
+ user_message: The user's question (string)
295
+ history: Chat history managed by Gradio (for display only)
296
+ user_preferences: The user's preferences text from the textbox
297
+ """
298
+ global conversation_history
299
+
300
+ # ── Step 1: Build the system prompt with user preferences ──
301
+ base_system_prompt = "You are a helpful AI assistant."
302
+
303
+ if user_preferences and user_preferences.strip():
304
+ system_prompt = f"""{base_system_prompt}
305
+
306
+ The user has set the following preferences. Always respect these when responding:
307
+ {user_preferences}"""
308
+ else:
309
+ system_prompt = base_system_prompt
310
+
311
+ # ── Step 2: Add the user message to conversation history ──
312
+ conversation_history.append({"role": "user", "content": user_message})
313
+
314
+ # ── Step 3: Build the messages list for the API call ──
315
+ # We send the system prompt + the FULL conversation history
316
+ # This gives the model context of ALL previous turns
317
+ messages_for_api = [{"role": "system", "content": system_prompt}]
318
+ messages_for_api.extend(conversation_history)
319
+
320
+ print(f"\nπŸ“€ Sending {len(messages_for_api)} messages to LLM (including system prompt)")
321
+
322
+ try:
323
+ # ── Step 4: Call the Groq LLM with full history ──
324
+ response = client.chat.completions.create(
325
+ model=MODEL,
326
+ messages=messages_for_api,
327
+ temperature=0.7
328
+ )
329
+
330
+ assistant_message = response.choices[0].message.content
331
+
332
+ # ── Step 5: Add assistant response to history ──
333
+ conversation_history.append({"role": "assistant", "content": assistant_message})
334
+
335
+ # ── Step 6: Save to disk immediately ──
336
+ save_chat_history()
337
+
338
+ print(f"πŸ“Š Total messages in history: {len(conversation_history)}")
339
+
340
+ return assistant_message
341
+
342
+ except Exception as e:
343
+ # Remove the user message we just added since the API call failed
344
+ conversation_history.pop()
345
+ return f"❌ Error: {str(e)}"
346
+
347
+
348
+ def save_preferences_btn(preferences_text):
349
+ """Save user preferences when the Save button is clicked."""
350
+ save_user_preferences(preferences_text)
351
+ return f"βœ… Preferences saved successfully!"
352
+
353
+
354
+ def clear_memory():
355
+ """
356
+ Clear ALL conversation history from both RAM and disk.
357
+ Also clears the preferences file.
358
+ """
359
+ global conversation_history
360
+ conversation_history = []
361
+
362
+ # Delete the history file from disk
363
+ if os.path.exists(CHAT_HISTORY_FILE):
364
+ os.remove(CHAT_HISTORY_FILE)
365
+
366
+ # Delete the preferences file from disk
367
+ if os.path.exists(USER_PREFERENCES_FILE):
368
+ os.remove(USER_PREFERENCES_FILE)
369
+
370
+ print("πŸ—‘οΈ Memory cleared β€” both RAM and local disk")
371
+ return None, "", "βœ… All memory cleared!"
372
+
373
+
374
+ # ══════════════════════════════════════════════════════════════════════════════
375
+ # STARTUP: Load previous session from disk
376
+ # ══════════════════════════════════════════════════════════════════════════════
377
+
378
+ load_chat_history()
379
+ saved_preferences = load_user_preferences()
380
+
381
+
382
+ # ══════════════════════════════════════════════════════════════════════════════
383
+ # BUILD THE GRADIO INTERFACE WITH 3 TABS
384
+ # ══════════════════════════════════════════════════════════════════════════════
385
+
386
+ with gr.Blocks(title="Multi-Turn AI Assistant with Memory") as demo:
387
+
388
+ gr.Markdown("# 🧠 Multi-Turn AI Assistant with Memory")
389
+ gr.Markdown("### Scrape websites, fetch transcripts, and chat with persistent memory")
390
+
391
+ with gr.Tabs():
392
+
393
+ # ──────────────────────────────────────────────────────────────────
394
+ # TAB 1: Website Scraper Q&A (from Version 1)
395
+ # ──────────────────────────────────────────────────────────────────
396
+ with gr.Tab("🌐 Website Scraper"):
397
+ gr.Markdown("## Scrape a Bot-Protected Website and Ask Questions")
398
+
399
+ with gr.Row():
400
+ url_input = gr.Textbox(
401
+ label="Website URL",
402
+ placeholder="https://www.goodreads.com/list/show/1.Best_Books_Ever",
403
+ scale=4
404
+ )
405
+ scrape_btn = gr.Button("πŸ” Scrape Website", variant="primary", scale=1)
406
+
407
+ scrape_output = gr.Textbox(label="Scraped Data", lines=8, interactive=False)
408
+
409
+ scrape_btn.click(fn=scrape_and_display, inputs=[url_input], outputs=[scrape_output])
410
+
411
+ gr.Markdown("### Ask Questions About the Scraped Data")
412
+ web_chat = gr.ChatInterface(
413
+ fn=ask_ai_website,
414
+ description="Example: 'What is the top-ranked book?' or 'Who wrote the second book?'",
415
+ flagging_mode="never"
416
+ )
417
+
418
+ # ──────────────────────────────────────────────────────────────────
419
+ # TAB 2: YouTube Transcript Q&A (from Version 2)
420
+ # ──────────────────────────────────────────────────────────────────
421
+ with gr.Tab("🎬 YouTube Transcript"):
422
+ gr.Markdown("## Fetch a YouTube Video Transcript and Ask Questions")
423
+ gr.Markdown("Enter a YouTube **Video ID** (e.g., `dQw4w9WgXcQ`)")
424
+
425
+ with gr.Row():
426
+ video_id_input = gr.Textbox(
427
+ label="YouTube Video ID",
428
+ placeholder="dQw4w9WgXcQ",
429
+ scale=4
430
+ )
431
+ fetch_btn = gr.Button("πŸ“₯ Fetch Transcript", variant="primary", scale=1)
432
+
433
+ transcript_output = gr.Textbox(label="Video Transcript", lines=8, interactive=False)
434
+
435
+ fetch_btn.click(fn=fetch_transcript, inputs=[video_id_input], outputs=[transcript_output])
436
+
437
+ gr.Markdown("### Ask Questions About the Video")
438
+ yt_chat = gr.ChatInterface(
439
+ fn=ask_ai_youtube,
440
+ description="Example: 'What is the main topic?' or 'Summarize in 3 bullet points'",
441
+ flagging_mode="never"
442
+ )
443
+
444
+ # ──────────────────────────────────────────────────────────────────
445
+ # TAB 3: Multi-Turn AI Chat with Memory (new in Version 3)
446
+ # ──────────────────────────────────────────────────────────────────
447
+ with gr.Tab("πŸ’¬ AI Chat with Memory"):
448
+ gr.Markdown("## Multi-Turn AI Chat with Persistent Memory")
449
+ gr.Markdown(
450
+ "This chatbot remembers your entire conversation history across sessions. "
451
+ "You can also set preferences that will influence how the AI responds."
452
+ )
453
+
454
+ with gr.Row():
455
+ with gr.Column(scale=3):
456
+ # ── Chat area ──
457
+ chatbot_display = gr.Chatbot(
458
+ label="Conversation",
459
+ height=400,
460
+ value=get_display_history(), # Load previous history on startup
461
+ type="messages" # Use the new messages format
462
+ )
463
+
464
+ with gr.Row():
465
+ user_input = gr.Textbox(
466
+ label="Your message",
467
+ placeholder="Type your message here...",
468
+ scale=4,
469
+ lines=1
470
+ )
471
+ send_btn = gr.Button("Send ▢️", variant="primary", scale=1)
472
+
473
+ with gr.Column(scale=1):
474
+ # ── User Preferences panel ──
475
+ gr.Markdown("### βš™οΈ User Preferences")
476
+ gr.Markdown(
477
+ "Set preferences that the AI will follow in all responses. "
478
+ "For example: 'Always respond formally' or 'Use bullet points'."
479
+ )
480
+
481
+ preferences_input = gr.Textbox(
482
+ label="Your Preferences",
483
+ placeholder="e.g., Always respond formally, Use bullet points, Keep answers short...",
484
+ lines=6,
485
+ value=saved_preferences # Load saved preferences on startup
486
+ )
487
+
488
+ save_pref_btn = gr.Button("πŸ’Ύ Save Preferences", variant="secondary")
489
+ pref_status = gr.Textbox(label="Status", interactive=False, lines=1)
490
+
491
+ gr.Markdown("---")
492
+ clear_btn = gr.Button("πŸ—‘οΈ Clear All Memory", variant="stop")
493
+ clear_status = gr.Textbox(label="Clear Status", interactive=False, lines=1)
494
+
495
+ # ── Connect buttons to functions ──
496
+
497
+ def send_message(user_msg, chat_history_display, preferences):
498
+ """Handle sending a message: get AI response and update display."""
499
+ if not user_msg or not user_msg.strip():
500
+ return "", chat_history_display
501
+
502
+ # Get AI response with full conversation context
503
+ ai_response = chat_with_memory(user_msg, chat_history_display, preferences)
504
+
505
+ # Update the displayed chat history
506
+ chat_history_display.append({"role": "user", "content": user_msg})
507
+ chat_history_display.append({"role": "assistant", "content": ai_response})
508
+
509
+ return "", chat_history_display
510
+
511
+ # Send button click
512
+ send_btn.click(
513
+ fn=send_message,
514
+ inputs=[user_input, chatbot_display, preferences_input],
515
+ outputs=[user_input, chatbot_display]
516
+ )
517
+
518
+ # Also send on Enter key
519
+ user_input.submit(
520
+ fn=send_message,
521
+ inputs=[user_input, chatbot_display, preferences_input],
522
+ outputs=[user_input, chatbot_display]
523
+ )
524
+
525
+ # Save preferences button
526
+ save_pref_btn.click(
527
+ fn=save_preferences_btn,
528
+ inputs=[preferences_input],
529
+ outputs=[pref_status]
530
+ )
531
+
532
+ # Clear memory button
533
+ clear_btn.click(
534
+ fn=clear_memory,
535
+ outputs=[chatbot_display, preferences_input, clear_status]
536
+ )
537
+
538
+
539
+ # ── Launch the app ──
540
+ if __name__ == "__main__":
541
+ demo.launch(inbrowser=True)
app.py CHANGED
@@ -232,14 +232,25 @@ Scraped Data:
232
  # ══════════════════════════════════════════════════════════════════════════════
233
 
234
  def fetch_transcript(video_id):
235
- """Fetch the transcript of a YouTube video using its Video ID."""
236
  global transcript_data
237
  if not video_id or not video_id.strip():
238
  return "❗ Please enter a valid YouTube Video ID."
239
 
240
  video_id = video_id.strip()
241
  try:
242
- api = YouTubeTranscriptApi()
 
 
 
 
 
 
 
 
 
 
 
243
  transcript = api.fetch(video_id)
244
  transcript_text = " ".join([snippet.text for snippet in transcript])
245
  transcript_data = transcript_text
 
232
  # ══════════════════════════════════════════════════════════════════════════════
233
 
234
  def fetch_transcript(video_id):
235
+ """Fetch the transcript of a YouTube video. Uses Bright Data proxy if available."""
236
  global transcript_data
237
  if not video_id or not video_id.strip():
238
  return "❗ Please enter a valid YouTube Video ID."
239
 
240
  video_id = video_id.strip()
241
  try:
242
+ # Use Bright Data proxy if credentials are available
243
+ # Needed on HF Spaces where YouTube DNS may not resolve
244
+ if bright_data_username and bright_data_password:
245
+ from youtube_transcript_api.proxies import GenericProxyConfig
246
+ proxy_url = f"http://{bright_data_username}:{bright_data_password}@brd.superproxy.io:33335"
247
+ proxy_config = GenericProxyConfig(http_url=proxy_url, https_url=proxy_url)
248
+ api = YouTubeTranscriptApi(proxy_config=proxy_config)
249
+ print(f"Fetching transcript via Bright Data proxy for video: {video_id}")
250
+ else:
251
+ api = YouTubeTranscriptApi()
252
+ print(f"Fetching transcript directly for video: {video_id}")
253
+
254
  transcript = api.fetch(video_id)
255
  transcript_text = " ".join([snippet.text for snippet in transcript])
256
  transcript_data = transcript_text