Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import requests | |
| import gradio as gr | |
| from bs4 import BeautifulSoup | |
| from groq import Groq | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| BRIGHTDATA_API_KEY = os.getenv("BRIGHTDATA_API_KEY") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| # --- Persistent storage --- | |
| HISTORY_FILE = "chat_history.json" | |
| if os.path.exists(HISTORY_FILE): | |
| with open(HISTORY_FILE, "r") as f: | |
| conversation_history = json.load(f) | |
| else: | |
| conversation_history = [] | |
| user_preferences = { | |
| "response_style": "formal", | |
| "cite_sources": True | |
| } | |
| client = Groq(api_key=GROQ_API_KEY) | |
| def ask_llm(context, history): | |
| system_prompt = f""" | |
| You are an AI assistant. | |
| User Preferences: | |
| {json.dumps(user_preferences)} | |
| Context: | |
| {context} | |
| """ | |
| messages = [{"role": "system", "content": system_prompt}] + history | |
| response = client.chat.completions.create( | |
| model="llama-3.1-8b-instant", | |
| messages=messages, | |
| temperature=0.3 | |
| ) | |
| return response.choices[0].message.content | |
| # --- Website Scraper --- | |
| def scrape_website(url, question): | |
| try: | |
| headers = {"Authorization": f"Bearer {BRIGHTDATA_API_KEY}"} | |
| payload = {"zone": "web_unlocker1", "url": url, "format": "raw"} | |
| r = requests.post("https://api.brightdata.com/request", headers=headers, json=payload, timeout=60) | |
| if r.status_code != 200: | |
| return f"Bright Data Error: {r.status_code}" | |
| soup = BeautifulSoup(r.text, "html.parser") | |
| text = soup.get_text(separator=" ", strip=True) | |
| if not text: | |
| return "β οΈ Could not extract content from the website." | |
| # Build temporary history for this turn | |
| temp_history = conversation_history + [{"role":"user","content":question}] | |
| answer = ask_llm(text[:12000], temp_history) | |
| # Update session & persistent history | |
| conversation_history.append({"role":"user","content":question}) | |
| conversation_history.append({"role":"assistant","content":answer}) | |
| with open(HISTORY_FILE, "w") as f: | |
| json.dump(conversation_history, f, indent=2) | |
| return answer | |
| except Exception as e: | |
| return f"Error scraping website: {str(e)}" | |
| # --- YouTube Q&A --- | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| def youtube_qa(video_id, question): | |
| try: | |
| video_id = video_id.strip() | |
| api = YouTubeTranscriptApi() # create instance | |
| transcript_list = api.list(video_id) | |
| # Try English transcript | |
| transcript = None | |
| for t in transcript_list: | |
| if t.language_code == "en": | |
| transcript = t | |
| break | |
| if transcript is None: | |
| return "β No English transcript available for this video." | |
| transcript_data = transcript.fetch() | |
| full_text = " ".join([item.text for item in transcript_data]) | |
| if not full_text: | |
| return "β οΈ Transcript found but empty." | |
| return ask_llm(full_text[:12000], question) | |
| except Exception as e: | |
| return f"β Could not retrieve transcript.\nDetails: {str(e)}" | |
| # --- User Preferences Update --- | |
| def update_preferences(style, cite_sources): | |
| user_preferences["response_style"] = style | |
| user_preferences["cite_sources"] = cite_sources | |
| return f"Preferences updated: {json.dumps(user_preferences)}" | |
| # --- Gradio Interface --- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π€ Version 3: Persistent Multi-Turn AI Chatbot") | |
| with gr.Tabs(): | |
| # TAB 1: Website | |
| with gr.Tab("π Website Q&A"): | |
| url_input = gr.Textbox(label="Enter Website URL") | |
| website_question = gr.Textbox(label="Ask a Question") | |
| website_output = gr.Textbox(label="Answer") | |
| website_btn = gr.Button("Ask") | |
| website_btn.click(scrape_website, inputs=[url_input, website_question], outputs=website_output) | |
| # TAB 2: YouTube | |
| with gr.Tab("π₯ YouTube Transcript Q&A"): | |
| video_id_input = gr.Textbox(label="Enter YouTube Video ID") | |
| youtube_question = gr.Textbox(label="Ask a Question") | |
| youtube_output = gr.Textbox(label="Answer") | |
| youtube_btn = gr.Button("Ask") | |
| youtube_btn.click(youtube_qa, inputs=[video_id_input, youtube_question], outputs=youtube_output) | |
| # TAB 3: Preferences | |
| with gr.Tab("βοΈ Preferences"): | |
| style_input = gr.Dropdown(choices=["formal", "informal"], label="Response Style") | |
| cite_input = gr.Checkbox(label="Cite Sources?") | |
| pref_btn = gr.Button("Update Preferences") | |
| pref_output = gr.Textbox(label="Status") | |
| pref_btn.click(update_preferences, inputs=[style_input, cite_input], outputs=pref_output) | |
| demo.launch() |