Wosqa's picture
Upload 3 files
512dbe2 verified
import os
import json
import requests
import gradio as gr
from bs4 import BeautifulSoup
from groq import Groq
from youtube_transcript_api import YouTubeTranscriptApi
BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# --- Persistent storage ---
HISTORY_FILE = "chat_history.json"
if os.path.exists(HISTORY_FILE):
with open(HISTORY_FILE, "r") as f:
conversation_history = json.load(f)
else:
conversation_history = []
user_preferences = {
"response_style": "formal",
"cite_sources": True
}
client = Groq(api_key=GROQ_API_KEY)
def ask_llm(context, history):
system_prompt = f"""
You are an AI assistant.
User Preferences:
{json.dumps(user_preferences)}
Context:
{context}
"""
messages = [{"role": "system", "content": system_prompt}] + history
response = client.chat.completions.create(
model="llama-3.1-8b-instant",
messages=messages,
temperature=0.3
)
return response.choices[0].message.content
# --- Website Scraper ---
def scrape_website(url, question):
try:
headers = {"Authorization": f"Bearer {BRIGHTDATA_API_KEY}"}
payload = {"zone": "web_unlocker1", "url": url, "format": "raw"}
r = requests.post("https://api.brightdata.com/request", headers=headers, json=payload, timeout=60)
if r.status_code != 200:
return f"Bright Data Error: {r.status_code}"
soup = BeautifulSoup(r.text, "html.parser")
text = soup.get_text(separator=" ", strip=True)
if not text:
return "⚠️ Could not extract content from the website."
# Build temporary history for this turn
temp_history = conversation_history + [{"role":"user","content":question}]
answer = ask_llm(text[:12000], temp_history)
# Update session & persistent history
conversation_history.append({"role":"user","content":question})
conversation_history.append({"role":"assistant","content":answer})
with open(HISTORY_FILE, "w") as f:
json.dump(conversation_history, f, indent=2)
return answer
except Exception as e:
return f"Error scraping website: {str(e)}"
# --- YouTube Q&A ---
from youtube_transcript_api import YouTubeTranscriptApi
def youtube_qa(video_id, question):
try:
video_id = video_id.strip()
api = YouTubeTranscriptApi() # create instance
transcript_list = api.list(video_id)
# Try English transcript
transcript = None
for t in transcript_list:
if t.language_code == "en":
transcript = t
break
if transcript is None:
return "❌ No English transcript available for this video."
transcript_data = transcript.fetch()
full_text = " ".join([item.text for item in transcript_data])
if not full_text:
return "⚠️ Transcript found but empty."
return ask_llm(full_text[:12000], question)
except Exception as e:
return f"❌ Could not retrieve transcript.\nDetails: {str(e)}"
# --- User Preferences Update ---
def update_preferences(style, cite_sources):
user_preferences["response_style"] = style
user_preferences["cite_sources"] = cite_sources
return f"Preferences updated: {json.dumps(user_preferences)}"
# --- Gradio Interface ---
with gr.Blocks() as demo:
gr.Markdown("# πŸ€– Version 3: Persistent Multi-Turn AI Chatbot")
with gr.Tabs():
# TAB 1: Website
with gr.Tab("🌐 Website Q&A"):
url_input = gr.Textbox(label="Enter Website URL")
website_question = gr.Textbox(label="Ask a Question")
website_output = gr.Textbox(label="Answer")
website_btn = gr.Button("Ask")
website_btn.click(scrape_website, inputs=[url_input, website_question], outputs=website_output)
# TAB 2: YouTube
with gr.Tab("πŸŽ₯ YouTube Transcript Q&A"):
video_id_input = gr.Textbox(label="Enter YouTube Video ID")
youtube_question = gr.Textbox(label="Ask a Question")
youtube_output = gr.Textbox(label="Answer")
youtube_btn = gr.Button("Ask")
youtube_btn.click(youtube_qa, inputs=[video_id_input, youtube_question], outputs=youtube_output)
# TAB 3: Preferences
with gr.Tab("βš™οΈ Preferences"):
style_input = gr.Dropdown(choices=["formal", "informal"], label="Response Style")
cite_input = gr.Checkbox(label="Cite Sources?")
pref_btn = gr.Button("Update Preferences")
pref_output = gr.Textbox(label="Status")
pref_btn.click(update_preferences, inputs=[style_input, cite_input], outputs=pref_output)
demo.launch()