Spaces:

Wosqa
/

MSDSF25M010-VER3

Sleeping

App Files Files Community

MSDSF25M010-VER3 / app.py

Wosqa

Upload 3 files

512dbe2 verified about 2 months ago

raw

history blame contribute delete

4.91 kB

	import os
	import json
	import requests
	import gradio as gr
	from bs4 import BeautifulSoup
	from groq import Groq
	from youtube_transcript_api import YouTubeTranscriptApi

	BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY")
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	# --- Persistent storage ---
	HISTORY_FILE = "chat_history.json"
	if os.path.exists(HISTORY_FILE):
	with open(HISTORY_FILE, "r") as f:
	conversation_history = json.load(f)
	else:
	conversation_history = []

	user_preferences = {
	"response_style": "formal",
	"cite_sources": True
	}

	client = Groq(api_key=GROQ_API_KEY)

	def ask_llm(context, history):
	system_prompt = f"""
	You are an AI assistant.
	User Preferences:
	{json.dumps(user_preferences)}

	Context:
	{context}
	"""
	messages = [{"role": "system", "content": system_prompt}] + history
	response = client.chat.completions.create(
	model="llama-3.1-8b-instant",
	messages=messages,
	temperature=0.3
	)
	return response.choices[0].message.content

	# --- Website Scraper ---
	def scrape_website(url, question):
	try:
	headers = {"Authorization": f"Bearer {BRIGHTDATA_API_KEY}"}
	payload = {"zone": "web_unlocker1", "url": url, "format": "raw"}
	r = requests.post("https://api.brightdata.com/request", headers=headers, json=payload, timeout=60)
	if r.status_code != 200:
	return f"Bright Data Error: {r.status_code}"
	soup = BeautifulSoup(r.text, "html.parser")
	text = soup.get_text(separator=" ", strip=True)
	if not text:
	return "⚠️ Could not extract content from the website."
	# Build temporary history for this turn
	temp_history = conversation_history + [{"role":"user","content":question}]
	answer = ask_llm(text[:12000], temp_history)
	# Update session & persistent history
	conversation_history.append({"role":"user","content":question})
	conversation_history.append({"role":"assistant","content":answer})
	with open(HISTORY_FILE, "w") as f:
	json.dump(conversation_history, f, indent=2)
	return answer
	except Exception as e:
	return f"Error scraping website: {str(e)}"

	# --- YouTube Q&A ---
	from youtube_transcript_api import YouTubeTranscriptApi

	def youtube_qa(video_id, question):
	try:
	video_id = video_id.strip()

	api = YouTubeTranscriptApi() # create instance
	transcript_list = api.list(video_id)

	# Try English transcript
	transcript = None
	for t in transcript_list:
	if t.language_code == "en":
	transcript = t
	break

	if transcript is None:
	return "❌ No English transcript available for this video."

	transcript_data = transcript.fetch()
	full_text = " ".join([item.text for item in transcript_data])

	if not full_text:
	return "⚠️ Transcript found but empty."

	return ask_llm(full_text[:12000], question)

	except Exception as e:
	return f"❌ Could not retrieve transcript.\nDetails: {str(e)}"

	# --- User Preferences Update ---
	def update_preferences(style, cite_sources):
	user_preferences["response_style"] = style
	user_preferences["cite_sources"] = cite_sources
	return f"Preferences updated: {json.dumps(user_preferences)}"

	# --- Gradio Interface ---
	with gr.Blocks() as demo:
	gr.Markdown("# 🤖 Version 3: Persistent Multi-Turn AI Chatbot")

	with gr.Tabs():
	# TAB 1: Website
	with gr.Tab("🌐 Website Q&A"):
	url_input = gr.Textbox(label="Enter Website URL")
	website_question = gr.Textbox(label="Ask a Question")
	website_output = gr.Textbox(label="Answer")
	website_btn = gr.Button("Ask")
	website_btn.click(scrape_website, inputs=[url_input, website_question], outputs=website_output)

	# TAB 2: YouTube
	with gr.Tab("🎥 YouTube Transcript Q&A"):
	video_id_input = gr.Textbox(label="Enter YouTube Video ID")
	youtube_question = gr.Textbox(label="Ask a Question")
	youtube_output = gr.Textbox(label="Answer")
	youtube_btn = gr.Button("Ask")
	youtube_btn.click(youtube_qa, inputs=[video_id_input, youtube_question], outputs=youtube_output)

	# TAB 3: Preferences
	with gr.Tab("⚙️ Preferences"):
	style_input = gr.Dropdown(choices=["formal", "informal"], label="Response Style")
	cite_input = gr.Checkbox(label="Cite Sources?")
	pref_btn = gr.Button("Update Preferences")
	pref_output = gr.Textbox(label="Status")
	pref_btn.click(update_preferences, inputs=[style_input, cite_input], outputs=pref_output)

	demo.launch()