MuhammadAhmadZia's picture
Upload folder using huggingface_hub
1090b45 verified
"""
Version 3 β€” Multi-Turn AI Chatbot with Persistent Storage
This version extends Version 2 with three major enhancements:
1. Multi-Turn Conversation (Short-term/Session Memory)
2. Persistent Storage (Cross-Session Memory via JSON file)
3. Editable User Preferences (injected into system prompt)
All features from Version 2 (Website Scraper + YouTube Transcript) are carried forward.
Usage:
1. Set environment variables: GROQ_API_KEY, BRIGHT_DATA_USERNAME, BRIGHT_DATA_PASSWORD
2. pip install -r requirements.txt
3. python app.py
"""
import os
import json
import requests
import gradio as gr
from openai import OpenAI
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from youtube_transcript_api import YouTubeTranscriptApi
from gtts import gTTS
from huggingface_hub import InferenceClient
import tempfile
# ─── Load environment variables ────────────────────────────────────────────────
# Try loading from the keys folder (local dev) or current dir (HF Spaces)
load_dotenv("../../keys/.env", override=True)
load_dotenv(".env", override=True)
groq_api_key = os.getenv("GROQ_API_KEY") or os.getenv("GROQ_API_Key")
bright_data_username = os.getenv("BRIGHT_DATA_USERNAME")
bright_data_password = os.getenv("BRIGHT_DATA_PASSWORD")
# ─── Set up Groq client (OpenAI-compatible API) ───────────────────────────────
client = OpenAI(
base_url="https://api.groq.com/openai/v1",
api_key=groq_api_key
)
hf_token = os.getenv("HF_TOKEN")
hf_client = InferenceClient(api_key=hf_token) if hf_token else None
MODEL = "llama-3.3-70b-versatile"
# ─── Global variables ─────────────────────────────────────────────────────────
scraped_data = "" # Stores website scraped data (Tab 1)
transcript_data = "" # Stores YouTube transcript data (Tab 2)
# ─── File paths for persistent storage ─────────────────────────────────────────
CHAT_HISTORY_FILE = "chat_history.json"
USER_PREFERENCES_FILE = "user_preferences.json"
# ─── Global conversation history (stored in RAM during runtime) ────────────────
conversation_history = []
# ══════════════════════════════════════════════════════════════════════════════
# PERSISTENT STORAGE FUNCTIONS
# ══════════════════════════════════════════════════════════════════════════════
def load_chat_history():
"""
Load previous conversation history from the JSON file on disk.
Called once at startup so the bot remembers past conversations.
"""
global conversation_history
if os.path.exists(CHAT_HISTORY_FILE):
try:
with open(CHAT_HISTORY_FILE, "r") as f:
conversation_history = json.load(f)
print(f"βœ… Loaded {len(conversation_history)} messages from {CHAT_HISTORY_FILE}")
except Exception as e:
print(f"❌ Error loading chat history: {e}")
conversation_history = []
else:
conversation_history = []
print("No previous chat history found. Starting fresh.")
def save_chat_history():
"""
Save the current conversation history to a JSON file on disk.
Called after every interaction so nothing is lost on restart.
"""
try:
with open(CHAT_HISTORY_FILE, "w") as f:
json.dump(conversation_history, f, indent=2)
print(f"πŸ’Ύ Saved {len(conversation_history)} messages to {CHAT_HISTORY_FILE}")
except Exception as e:
print(f"❌ Error saving chat history: {e}")
def load_user_preferences():
"""Load user preferences from the JSON file on disk."""
if os.path.exists(USER_PREFERENCES_FILE):
try:
with open(USER_PREFERENCES_FILE, "r") as f:
data = json.load(f)
return data.get("preferences", "")
except Exception as e:
print(f"❌ Error loading preferences: {e}")
return ""
return ""
def save_user_preferences(preferences_text):
"""Save user preferences to a JSON file on disk."""
try:
with open(USER_PREFERENCES_FILE, "w") as f:
json.dump({"preferences": preferences_text}, f, indent=2)
print(f"πŸ’Ύ Saved user preferences to {USER_PREFERENCES_FILE}")
except Exception as e:
print(f"❌ Error saving preferences: {e}")
def get_display_history():
"""
Convert conversation_history (list of dicts) into Gradio Chatbot format.
Gradio expects a list of {"role": "user"/"assistant", "content": "..."} dicts.
We filter out system messages since they shouldn't be displayed.
"""
display_history = []
for msg in conversation_history:
if msg["role"] in ("user", "assistant"):
display_history.append({"role": msg["role"], "content": msg["content"]})
return display_history
# ══════════════════════════════════════════════════════════════════════════════
# TAB 1: WEBSITE SCRAPER (carried forward from Version 1 & 2)
# ══════════════════════════════════════════════════════════════════════════════
def scrape_website(url):
"""Scrape a bot-protected website using Bright Data Web Unlocker proxy."""
try:
print(f"Scraping URL: {url}")
if bright_data_username and bright_data_password:
proxy_url = f"http://{bright_data_username}:{bright_data_password}@brd.superproxy.io:33335"
proxies = {"http": proxy_url, "https": proxy_url}
print("Using Bright Data Web Unlocker proxy to bypass bot protection...")
response = requests.get(url, proxies=proxies, timeout=60, verify=False)
else:
print("Bright Data credentials not found. Using standard requests...")
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
response = requests.get(url, headers=headers, timeout=15, verify=False)
response.raise_for_status()
print(f"Successfully scraped! Status code: {response.status_code}")
return response.text
except requests.exceptions.RequestException as e:
return f"Error scraping website: {str(e)}"
def parse_goodreads_books(html_content):
"""Parse Goodreads Best Books page HTML and extract book data."""
soup = BeautifulSoup(html_content, "html.parser")
books = []
book_rows = soup.select("tr[itemtype='http://schema.org/Book']")
if book_rows:
for i, row in enumerate(book_rows, 1):
title_tag = row.select_one(".bookTitle span")
title = title_tag.get_text(strip=True) if title_tag else "Unknown Title"
author_tag = row.select_one(".authorName span")
author = author_tag.get_text(strip=True) if author_tag else "Unknown Author"
rating_tag = row.select_one(".minirating")
rating = rating_tag.get_text(strip=True) if rating_tag else "No Rating"
books.append({"rank": i, "title": title, "author": author, "rating": rating})
if not books:
title_tags = soup.select("a.bookTitle") or soup.select("[class*='bookTitle']")
author_tags = soup.select("a.authorName") or soup.select("[class*='authorName']")
rating_tags = soup.select(".minirating") or soup.select("[class*='rating']")
for i in range(len(title_tags)):
title = title_tags[i].get_text(strip=True) if i < len(title_tags) else "Unknown"
author = author_tags[i].get_text(strip=True) if i < len(author_tags) else "Unknown"
rating = rating_tags[i].get_text(strip=True) if i < len(rating_tags) else "N/A"
books.append({"rank": i + 1, "title": title, "author": author, "rating": rating})
if not books:
text_content = ""
if soup.body:
for tag in soup.body(["script", "style", "img", "input"]):
tag.decompose()
text_content = soup.body.get_text(separator="\n", strip=True)
return f"Could not parse structured book data. Raw content:\n\n{text_content[:5000]}"
result = f"Found {len(books)} books:\n\n"
for book in books:
result += f"Rank #{book['rank']}: {book['title']} by {book['author']} β€” {book['rating']}\n"
return result
def scrape_and_display(url):
"""Scrape a website and store the data for Q&A."""
global scraped_data
if not url or not url.strip():
return "❗ Please enter a valid URL."
html_content = scrape_website(url)
if html_content.startswith("Error"):
return html_content
parsed_data = parse_goodreads_books(html_content)
scraped_data = parsed_data
return f"βœ… Website scraped successfully!\n\n{parsed_data}"
def ask_ai_website(user_question, history):
"""Q&A function for website scraped data (Tab 1)."""
global scraped_data
if not scraped_data:
return "⚠️ No data scraped yet! Enter a URL above and click 'Scrape Website' first."
system_prompt = f"""You are a helpful assistant that answers questions based ONLY on the provided scraped website data.
RULES: Only use info from the data below. If not available, say so. Be concise.
Scraped Data:
{scraped_data}"""
try:
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_question}
],
temperature=0.3
)
return response.choices[0].message.content
except Exception as e:
return f"❌ Error: {str(e)}"
# ══════════════════════════════════════════════════════════════════════════════
# TAB 2: YOUTUBE TRANSCRIPT Q&A (carried forward from Version 2)
# ══════════════════════════════════════════════════════════════════════════════
def fetch_transcript(video_id):
"""Fetch the transcript of a YouTube video using its Video ID."""
global transcript_data
if not video_id or not video_id.strip():
return "❗ Please enter a valid YouTube Video ID."
video_id = video_id.strip()
# Retry logic for intermittent DNS issues (common on HF Spaces)
import time
max_retries = 3
last_error = None
for attempt in range(max_retries):
try:
api = YouTubeTranscriptApi()
print(f"Fetching transcript for video: {video_id} (attempt {attempt + 1}/{max_retries})")
transcript = api.fetch(video_id)
transcript_text = " ".join([snippet.text for snippet in transcript])
transcript_data = transcript_text
return f"βœ… Transcript fetched! ({len(transcript_text)} chars)\n\n{transcript_text[:2000]}{'...' if len(transcript_text) > 2000 else ''}"
except Exception as e:
last_error = e
print(f"Attempt {attempt + 1} failed: {str(e)}")
if attempt < max_retries - 1:
time.sleep(2)
transcript_data = ""
error_msg = str(last_error)
if "NameResolution" in error_msg or "Failed to resolve" in error_msg:
return f"❌ DNS resolution error (common on HF Spaces free tier). Please try again in a few minutes.\n\nDetails: {error_msg}"
return f"❌ Error fetching transcript: {error_msg}\n\nMake sure the Video ID is correct and the video has captions."
def ask_ai_youtube(user_question, history):
"""Q&A function for YouTube transcript data (Tab 2)."""
global transcript_data
if not transcript_data:
return "⚠️ No transcript fetched yet! Enter a Video ID above and click 'Fetch Transcript' first."
system_prompt = f"""You are a helpful assistant that answers questions based ONLY on the provided YouTube video transcript.
RULES: Only use info from the transcript below. If not available, say so. Be concise.
Transcript:
{transcript_data}"""
try:
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_question}
],
temperature=0.3
)
return response.choices[0].message.content
except Exception as e:
return f"❌ Error: {str(e)}"
# ══════════════════════════════════════════════════════════════════════════════
# TAB 3: MULTI-TURN AI CHAT WITH PERSISTENT MEMORY (new in Version 3)
# ══════════════════════════════════════════════════════════════════════════════
def chat_with_memory(user_message, history, user_preferences):
"""
Multi-turn chat function with persistent memory.
How it works:
1. Loads user preferences and injects them into the system prompt
2. Appends the user message to conversation_history
3. Sends the ENTIRE conversation_history to the LLM (so it has full context)
4. Appends the assistant response to conversation_history
5. Saves everything to disk immediately
Args:
user_message: The user's question (string)
history: Chat history managed by Gradio (for display only)
user_preferences: The user's preferences text from the textbox
"""
global conversation_history
# ── Step 1: Build the system prompt with user preferences ──
base_system_prompt = "You are a helpful AI assistant."
if user_preferences and user_preferences.strip():
system_prompt = f"""{base_system_prompt}
The user has set the following preferences. Always respect these when responding:
{user_preferences}"""
else:
system_prompt = base_system_prompt
# ── Step 2: Add the user message to conversation history ──
conversation_history.append({"role": "user", "content": user_message})
# ── Step 3: Build the messages list for the API call ──
# We send the system prompt + the FULL conversation history
# This gives the model context of ALL previous turns
messages_for_api = [{"role": "system", "content": system_prompt}]
messages_for_api.extend(conversation_history)
print(f"\nπŸ“€ Sending {len(messages_for_api)} messages to LLM (including system prompt)")
try:
# ── Step 4: Call the Groq LLM with full history ──
response = client.chat.completions.create(
model=MODEL,
messages=messages_for_api,
temperature=0.7
)
assistant_message = response.choices[0].message.content
# ── Step 5: Add assistant response to history ──
conversation_history.append({"role": "assistant", "content": assistant_message})
# ── Step 6: Save to disk immediately ──
save_chat_history()
print(f"πŸ“Š Total messages in history: {len(conversation_history)}")
return assistant_message
except Exception as e:
# Remove the user message we just added since the API call failed
conversation_history.pop()
return f"❌ Error: {str(e)}"
def save_preferences_btn(preferences_text):
"""Save user preferences when the Save button is clicked."""
save_user_preferences(preferences_text)
return f"βœ… Preferences saved successfully!"
def clear_memory():
"""
Clear ALL conversation history from both RAM and disk.
Also clears the preferences file.
"""
global conversation_history
conversation_history = []
# Delete the history file from disk
if os.path.exists(CHAT_HISTORY_FILE):
os.remove(CHAT_HISTORY_FILE)
# Delete the preferences file from disk
if os.path.exists(USER_PREFERENCES_FILE):
os.remove(USER_PREFERENCES_FILE)
print("πŸ—‘οΈ Memory cleared β€” both RAM and local disk")
return None, "", "βœ… All memory cleared!"
# ══════════════════════════════════════════════════════════════════════════════
# STARTUP: Load previous session from disk
# ══════════════════════════════════════════════════════════════════════════════
load_chat_history()
saved_preferences = load_user_preferences()
# ══════════════════════════════════════════════════════════════════════════════
# TAB 4: VOICE ASSISTANT (new in Version 4)
# ══════════════════════════════════════════════════════════════════════════════
def voice_chatbot(audio_filepath):
"""
Handle voice input purely:
1. Transcribe audio using Whisper on Groq
2. Get AI response using a simple chat completion (no memory)
3. Convert AI text to speech using HF SpeechT5 (with gTTS fallback)
"""
if not audio_filepath:
return "", "", None
# ── 1. Speech-to-Text (Transcription) ──
try:
print("πŸŽ™οΈ Transcribing audio via Groq Whisper...")
with open(audio_filepath, "rb") as file:
transcription = client.audio.transcriptions.create(
file=("audio.wav", file.read()), # Send dummy filename
model="whisper-large-v3-turbo",
)
user_text = transcription.text
print(f"πŸ‘€ User said: {user_text}")
except Exception as e:
print(f"❌ Transcription Error: {e}")
return "", "", None
# ── 2. Get AI Response (No Memory) ──
try:
response = client.chat.completions.create(
model=MODEL,
messages=[
{"role": "system", "content": "You are a helpful assistant. Keep answers concise."},
{"role": "user", "content": user_text}
],
temperature=0.7,
max_tokens=512
)
ai_text = response.choices[0].message.content
except Exception as e:
print(f"❌ LLM Error: {e}")
return user_text, f"Error getting response: {e}", None
# ── 3. Text-to-Speech (Generation) ──
import os
import tempfile
import uuid
try:
print("πŸ”Š Generating speech via HF SpeechT5...")
if hf_client is None:
raise ValueError("HF_TOKEN missing for Text-to-Speech")
audio_bytes = hf_client.text_to_speech(ai_text, model="microsoft/speecht5_tts")
temp_filename = f"bot_response_{uuid.uuid4().hex}.wav"
temp_audio_path = os.path.join(tempfile.gettempdir(), temp_filename)
with open(temp_audio_path, "wb") as f:
f.write(audio_bytes)
audio_out = temp_audio_path
print(f"βœ… Audio saved to {audio_out}")
except Exception as e:
print(f"❌ TTS Error: {e}")
# Fallback to gTTS
try:
print("πŸ”Š Falling back to gTTS...")
tts = gTTS(text=ai_text, lang='en')
temp_filename = f"bot_response_{uuid.uuid4().hex}.mp3"
temp_audio_path = os.path.join(tempfile.gettempdir(), temp_filename)
tts.save(temp_audio_path)
audio_out = temp_audio_path
print(f"βœ… Audio saved to {audio_out}")
except Exception as fallback_e:
print(f"❌ Fallback TTS Error: {fallback_e}")
audio_out = None
return user_text, ai_text, audio_out
# ══════════════════════════════════════════════════════════════════════════════
# BUILD THE GRADIO INTERFACE WITH 4 TABS
# ══════════════════════════════════════════════════════════════════════════════
with gr.Blocks(title="Multimodal Voice Assistant") as demo:
gr.Markdown("# πŸ€– Multimodal Voice Assistant")
gr.Markdown("### Scrape websites, fetch transcripts, chat with persistent memory, and talk!")
with gr.Tabs():
# ──────────────────────────────────────────────────────────────────
# TAB 1: Website Scraper Q&A (from Version 1)
# ──────────────────────────────────────────────────────────────────
with gr.Tab("🌐 Website Scraper"):
gr.Markdown("## Scrape a Bot-Protected Website and Ask Questions")
with gr.Row():
url_input = gr.Textbox(
label="Website URL",
placeholder="https://www.goodreads.com/list/show/1.Best_Books_Ever",
scale=4
)
scrape_btn = gr.Button("πŸ” Scrape Website", variant="primary", scale=1)
scrape_output = gr.Textbox(label="Scraped Data", lines=8, interactive=False)
scrape_btn.click(fn=scrape_and_display, inputs=[url_input], outputs=[scrape_output])
gr.Markdown("### Ask Questions About the Scraped Data")
web_chat = gr.ChatInterface(
fn=ask_ai_website,
description="Example: 'What is the top-ranked book?' or 'Who wrote the second book?'",
flagging_mode="never"
)
# ──────────────────────────────────────────────────────────────────
# TAB 2: YouTube Transcript Q&A (from Version 2)
# ──────────────────────────────────────────────────────────────────
with gr.Tab("🎬 YouTube Transcript"):
gr.Markdown("## Fetch a YouTube Video Transcript and Ask Questions")
gr.Markdown("Enter a YouTube **Video ID** (e.g., `dQw4w9WgXcQ`)")
with gr.Row():
video_id_input = gr.Textbox(
label="YouTube Video ID",
placeholder="dQw4w9WgXcQ",
scale=4
)
fetch_btn = gr.Button("πŸ“₯ Fetch Transcript", variant="primary", scale=1)
transcript_output = gr.Textbox(label="Video Transcript", lines=8, interactive=False)
fetch_btn.click(fn=fetch_transcript, inputs=[video_id_input], outputs=[transcript_output])
gr.Markdown("### Ask Questions About the Video")
yt_chat = gr.ChatInterface(
fn=ask_ai_youtube,
description="Example: 'What is the main topic?' or 'Summarize in 3 bullet points'",
flagging_mode="never"
)
# ──────────────────────────────────────────────────────────────────
# TAB 3: Multi-Turn AI Chat with Memory (new in Version 3)
# ──────────────────────────────────────────────────────────────────
with gr.Tab("πŸ’¬ AI Chat with Memory"):
gr.Markdown("## Multi-Turn AI Chat with Persistent Memory")
gr.Markdown(
"This chatbot remembers your entire conversation history across sessions. "
"You can also set preferences that will influence how the AI responds."
)
with gr.Row():
with gr.Column(scale=3):
# ── Chat area ──
chatbot_display = gr.Chatbot(
label="Conversation",
height=400,
value=get_display_history(), # Load previous history on startup
type="messages" # Use the new messages format
)
with gr.Row():
user_input = gr.Textbox(
label="Your message",
placeholder="Type your message here...",
scale=4,
lines=1
)
send_btn = gr.Button("Send ▢️", variant="primary", scale=1)
with gr.Column(scale=1):
# ── User Preferences panel ──
gr.Markdown("### βš™οΈ User Preferences")
gr.Markdown(
"Set preferences that the AI will follow in all responses. "
"For example: 'Always respond formally' or 'Use bullet points'."
)
preferences_input = gr.Textbox(
label="Your Preferences",
placeholder="e.g., Always respond formally, Use bullet points, Keep answers short...",
lines=6,
value=saved_preferences # Load saved preferences on startup
)
save_pref_btn = gr.Button("πŸ’Ύ Save Preferences", variant="secondary")
pref_status = gr.Textbox(label="Status", interactive=False, lines=1)
gr.Markdown("---")
clear_btn = gr.Button("πŸ—‘οΈ Clear All Memory", variant="stop")
clear_status = gr.Textbox(label="Clear Status", interactive=False, lines=1)
# ── Connect buttons to functions ──
def send_message(user_msg, chat_history_display, preferences):
"""Handle sending a message: get AI response and update display."""
if not user_msg or not user_msg.strip():
return "", chat_history_display
# Get AI response with full conversation context
ai_response = chat_with_memory(user_msg, chat_history_display, preferences)
# Update the displayed chat history
chat_history_display.append({"role": "user", "content": user_msg})
chat_history_display.append({"role": "assistant", "content": ai_response})
return "", chat_history_display
# Send button click
send_btn.click(
fn=send_message,
inputs=[user_input, chatbot_display, preferences_input],
outputs=[user_input, chatbot_display]
)
# Also send on Enter key
user_input.submit(
fn=send_message,
inputs=[user_input, chatbot_display, preferences_input],
outputs=[user_input, chatbot_display]
)
# Save preferences button
save_pref_btn.click(
fn=save_preferences_btn,
inputs=[preferences_input],
outputs=[pref_status]
)
# Clear memory button
clear_btn.click(
fn=clear_memory,
outputs=[chatbot_display, preferences_input, clear_status]
)
# ──────────────────────────────────────────────────────────────────
# TAB 4: Voice Assistant (new in Version 4)
# ──────────────────────────────────────────────────────────────────
with gr.Tab("πŸŽ™οΈ Multimodal AI Assistant (Voice)"):
gr.Markdown("<h2 align=center>Voice Chatbot (Whisper + Groq + HF SpeechT5)</h2>")
gr.Markdown("Speak to the AI and it will answer with voice. This tab does not retain conversational memory to keep it purely a Speech-to-Text and Text-to-Speech feature as requested.")
with gr.Row():
mic_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Speak"
)
with gr.Row():
with gr.Column():
user_text_output = gr.Textbox(label="User Transcribed Text", interactive=False)
bot_text_output = gr.Textbox(label="Bot Text Response", interactive=False)
with gr.Column():
bot_audio_output = gr.Audio(
label="Bot Audio Response",
type="filepath",
interactive=False,
autoplay=True
)
mic_input.change(
fn=voice_chatbot,
inputs=[mic_input],
outputs=[user_text_output, bot_text_output, bot_audio_output]
)
# ── Launch the app ──
if __name__ == "__main__":
demo.launch(inbrowser=True)