Spaces:

MuhammadAhmadZia
/

MSDSF25M004_Ver3

Sleeping

App Files Files Community

MuhammadAhmadZia commited on Mar 1

Commit

e9fe853

verified ·

1 Parent(s): cf8928d

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app - Copy.py +541 -0
app.py +13 -2

app - Copy.py ADDED Viewed

	@@ -0,0 +1,541 @@

+"""
+Version 3 — Multi-Turn AI Chatbot with Persistent Storage
+This version extends Version 2 with three major enhancements:
+  1. Multi-Turn Conversation (Short-term/Session Memory)
+  2. Persistent Storage (Cross-Session Memory via JSON file)
+  3. Editable User Preferences (injected into system prompt)
+All features from Version 2 (Website Scraper + YouTube Transcript) are carried forward.
+Usage:
+    1. Set environment variables: GROQ_API_KEY, BRIGHT_DATA_USERNAME, BRIGHT_DATA_PASSWORD
+    2. pip install -r requirements.txt
+    3. python app.py
+"""
+import os
+import json
+import requests
+import gradio as gr
+from openai import OpenAI
+from bs4 import BeautifulSoup
+from dotenv import load_dotenv
+from youtube_transcript_api import YouTubeTranscriptApi
+# ─── Load environment variables ────────────────────────────────────────────────
+# Try loading from the keys folder (local dev) or current dir (HF Spaces)
+load_dotenv("../../keys/.env", override=True)
+load_dotenv(".env", override=True)
+groq_api_key = os.getenv("GROQ_API_KEY") or os.getenv("GROQ_API_Key")
+bright_data_username = os.getenv("BRIGHT_DATA_USERNAME")
+bright_data_password = os.getenv("BRIGHT_DATA_PASSWORD")
+# ─── Set up Groq client (OpenAI-compatible API) ───────────────────────────────
+client = OpenAI(
+    base_url="https://api.groq.com/openai/v1",
+    api_key=groq_api_key
+)
+MODEL = "llama-3.3-70b-versatile"
+# ─── Global variables ─────────────────────────────────────────────────────────
+scraped_data = ""           # Stores website scraped data (Tab 1)
+transcript_data = ""        # Stores YouTube transcript data (Tab 2)
+# ─── File paths for persistent storage ─────────────────────────────────────────
+CHAT_HISTORY_FILE = "chat_history.json"
+USER_PREFERENCES_FILE = "user_preferences.json"
+# ─── Global conversation history (stored in RAM during runtime) ────────────────
+conversation_history = []
+# ══════════════════════════════════════════════════════════════════════════════
+# PERSISTENT STORAGE FUNCTIONS
+# ══════════════════════════════════════════════════════════════════════════════
+def load_chat_history():
+    """
+    Load previous conversation history from the JSON file on disk.
+    Called once at startup so the bot remembers past conversations.
+    """
+    global conversation_history
+    if os.path.exists(CHAT_HISTORY_FILE):
+        try:
+            with open(CHAT_HISTORY_FILE, "r") as f:
+                conversation_history = json.load(f)
+                print(f"✅ Loaded {len(conversation_history)} messages from {CHAT_HISTORY_FILE}")
+        except Exception as e:
+            print(f"❌ Error loading chat history: {e}")
+            conversation_history = []
+    else:
+        conversation_history = []
+        print("No previous chat history found. Starting fresh.")
+def save_chat_history():
+    """
+    Save the current conversation history to a JSON file on disk.
+    Called after every interaction so nothing is lost on restart.
+    """
+    try:
+        with open(CHAT_HISTORY_FILE, "w") as f:
+            json.dump(conversation_history, f, indent=2)
+        print(f"💾 Saved {len(conversation_history)} messages to {CHAT_HISTORY_FILE}")
+    except Exception as e:
+        print(f"❌ Error saving chat history: {e}")
+def load_user_preferences():
+    """Load user preferences from the JSON file on disk."""
+    if os.path.exists(USER_PREFERENCES_FILE):
+        try:
+            with open(USER_PREFERENCES_FILE, "r") as f:
+                data = json.load(f)
+                return data.get("preferences", "")
+        except Exception as e:
+            print(f"❌ Error loading preferences: {e}")
+            return ""
+    return ""
+def save_user_preferences(preferences_text):
+    """Save user preferences to a JSON file on disk."""
+    try:
+        with open(USER_PREFERENCES_FILE, "w") as f:
+            json.dump({"preferences": preferences_text}, f, indent=2)
+        print(f"💾 Saved user preferences to {USER_PREFERENCES_FILE}")
+    except Exception as e:
+        print(f"❌ Error saving preferences: {e}")
+def get_display_history():
+    """
+    Convert conversation_history (list of dicts) into Gradio Chatbot format.
+    Gradio expects a list of {"role": "user"/"assistant", "content": "..."} dicts.
+    We filter out system messages since they shouldn't be displayed.
+    """
+    display_history = []
+    for msg in conversation_history:
+        if msg["role"] in ("user", "assistant"):
+            display_history.append({"role": msg["role"], "content": msg["content"]})
+    return display_history
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 1: WEBSITE SCRAPER (carried forward from Version 1 & 2)
+# ══════════════════════════════════════════════════════════════════════════════
+def scrape_website(url):
+    """Scrape a bot-protected website using Bright Data Web Unlocker proxy."""
+    try:
+        print(f"Scraping URL: {url}")
+        if bright_data_username and bright_data_password:
+            proxy_url = f"http://{bright_data_username}:{bright_data_password}@brd.superproxy.io:33335"
+            proxies = {"http": proxy_url, "https": proxy_url}
+            print("Using Bright Data Web Unlocker proxy to bypass bot protection...")
+            response = requests.get(url, proxies=proxies, timeout=60, verify=False)
+        else:
+            print("Bright Data credentials not found. Using standard requests...")
+            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"}
+            response = requests.get(url, headers=headers, timeout=15, verify=False)
+        response.raise_for_status()
+        print(f"Successfully scraped! Status code: {response.status_code}")
+        return response.text
+    except requests.exceptions.RequestException as e:
+        return f"Error scraping website: {str(e)}"
+def parse_goodreads_books(html_content):
+    """Parse Goodreads Best Books page HTML and extract book data."""
+    soup = BeautifulSoup(html_content, "html.parser")
+    books = []
+    book_rows = soup.select("tr[itemtype='http://schema.org/Book']")
+    if book_rows:
+        for i, row in enumerate(book_rows, 1):
+            title_tag = row.select_one(".bookTitle span")
+            title = title_tag.get_text(strip=True) if title_tag else "Unknown Title"
+            author_tag = row.select_one(".authorName span")
+            author = author_tag.get_text(strip=True) if author_tag else "Unknown Author"
+            rating_tag = row.select_one(".minirating")
+            rating = rating_tag.get_text(strip=True) if rating_tag else "No Rating"
+            books.append({"rank": i, "title": title, "author": author, "rating": rating})
+    if not books:
+        title_tags = soup.select("a.bookTitle") or soup.select("[class*='bookTitle']")
+        author_tags = soup.select("a.authorName") or soup.select("[class*='authorName']")
+        rating_tags = soup.select(".minirating") or soup.select("[class*='rating']")
+        for i in range(len(title_tags)):
+            title = title_tags[i].get_text(strip=True) if i < len(title_tags) else "Unknown"
+            author = author_tags[i].get_text(strip=True) if i < len(author_tags) else "Unknown"
+            rating = rating_tags[i].get_text(strip=True) if i < len(rating_tags) else "N/A"
+            books.append({"rank": i + 1, "title": title, "author": author, "rating": rating})
+    if not books:
+        text_content = ""
+        if soup.body:
+            for tag in soup.body(["script", "style", "img", "input"]):
+                tag.decompose()
+            text_content = soup.body.get_text(separator="\n", strip=True)
+        return f"Could not parse structured book data. Raw content:\n\n{text_content[:5000]}"
+    result = f"Found {len(books)} books:\n\n"
+    for book in books:
+        result += f"Rank #{book['rank']}: {book['title']} by {book['author']} — {book['rating']}\n"
+    return result
+def scrape_and_display(url):
+    """Scrape a website and store the data for Q&A."""
+    global scraped_data
+    if not url or not url.strip():
+        return "❗ Please enter a valid URL."
+    html_content = scrape_website(url)
+    if html_content.startswith("Error"):
+        return html_content
+    parsed_data = parse_goodreads_books(html_content)
+    scraped_data = parsed_data
+    return f"✅ Website scraped successfully!\n\n{parsed_data}"
+def ask_ai_website(user_question, history):
+    """Q&A function for website scraped data (Tab 1)."""
+    global scraped_data
+    if not scraped_data:
+        return "⚠️ No data scraped yet! Enter a URL above and click 'Scrape Website' first."
+    system_prompt = f"""You are a helpful assistant that answers questions based ONLY on the provided scraped website data.
+RULES: Only use info from the data below. If not available, say so. Be concise.
+Scraped Data:
+{scraped_data}"""
+    try:
+        response = client.chat.completions.create(
+            model=MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_question}
+            ],
+            temperature=0.3
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 2: YOUTUBE TRANSCRIPT Q&A (carried forward from Version 2)
+# ══════════════════════════════════════════════════════════════════════════════
+def fetch_transcript(video_id):
+    """Fetch the transcript of a YouTube video using its Video ID."""
+    global transcript_data
+    if not video_id or not video_id.strip():
+        return "❗ Please enter a valid YouTube Video ID."
+    video_id = video_id.strip()
+    try:
+        api = YouTubeTranscriptApi()
+        transcript = api.fetch(video_id)
+        transcript_text = " ".join([snippet.text for snippet in transcript])
+        transcript_data = transcript_text
+        return f"✅ Transcript fetched! ({len(transcript_text)} chars)\n\n{transcript_text[:2000]}{'...' if len(transcript_text) > 2000 else ''}"
+    except Exception as e:
+        transcript_data = ""
+        return f"❌ Error fetching transcript: {str(e)}\n\nMake sure the Video ID is correct and the video has captions."
+def ask_ai_youtube(user_question, history):
+    """Q&A function for YouTube transcript data (Tab 2)."""
+    global transcript_data
+    if not transcript_data:
+        return "⚠️ No transcript fetched yet! Enter a Video ID above and click 'Fetch Transcript' first."
+    system_prompt = f"""You are a helpful assistant that answers questions based ONLY on the provided YouTube video transcript.
+RULES: Only use info from the transcript below. If not available, say so. Be concise.
+Transcript:
+{transcript_data}"""
+    try:
+        response = client.chat.completions.create(
+            model=MODEL,
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_question}
+            ],
+            temperature=0.3
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"❌ Error: {str(e)}"
+# ══════════════════════════════════════════════════════════════════════════════
+# TAB 3: MULTI-TURN AI CHAT WITH PERSISTENT MEMORY (new in Version 3)
+# ══════════════════════════════════════════════════════════════════════════════
+def chat_with_memory(user_message, history, user_preferences):
+    """
+    Multi-turn chat function with persistent memory.
+    How it works:
+    1. Loads user preferences and injects them into the system prompt
+    2. Appends the user message to conversation_history
+    3. Sends the ENTIRE conversation_history to the LLM (so it has full context)
+    4. Appends the assistant response to conversation_history
+    5. Saves everything to disk immediately
+    Args:
+        user_message: The user's question (string)
+        history: Chat history managed by Gradio (for display only)
+        user_preferences: The user's preferences text from the textbox
+    """
+    global conversation_history
+    # ── Step 1: Build the system prompt with user preferences ──
+    base_system_prompt = "You are a helpful AI assistant."
+    if user_preferences and user_preferences.strip():
+        system_prompt = f"""{base_system_prompt}
+The user has set the following preferences. Always respect these when responding:
+{user_preferences}"""
+    else:
+        system_prompt = base_system_prompt
+    # ── Step 2: Add the user message to conversation history ──
+    conversation_history.append({"role": "user", "content": user_message})
+    # ── Step 3: Build the messages list for the API call ──
+    # We send the system prompt + the FULL conversation history
+    # This gives the model context of ALL previous turns
+    messages_for_api = [{"role": "system", "content": system_prompt}]
+    messages_for_api.extend(conversation_history)
+    print(f"\n📤 Sending {len(messages_for_api)} messages to LLM (including system prompt)")
+    try:
+        # ── Step 4: Call the Groq LLM with full history ──
+        response = client.chat.completions.create(
+            model=MODEL,
+            messages=messages_for_api,
+            temperature=0.7
+        )
+        assistant_message = response.choices[0].message.content
+        # ── Step 5: Add assistant response to history ──
+        conversation_history.append({"role": "assistant", "content": assistant_message})
+        # ── Step 6: Save to disk immediately ──
+        save_chat_history()
+        print(f"📊 Total messages in history: {len(conversation_history)}")
+        return assistant_message
+    except Exception as e:
+        # Remove the user message we just added since the API call failed
+        conversation_history.pop()
+        return f"❌ Error: {str(e)}"
+def save_preferences_btn(preferences_text):
+    """Save user preferences when the Save button is clicked."""
+    save_user_preferences(preferences_text)
+    return f"✅ Preferences saved successfully!"
+def clear_memory():
+    """
+    Clear ALL conversation history from both RAM and disk.
+    Also clears the preferences file.
+    """
+    global conversation_history
+    conversation_history = []
+    # Delete the history file from disk
+    if os.path.exists(CHAT_HISTORY_FILE):
+        os.remove(CHAT_HISTORY_FILE)
+    # Delete the preferences file from disk
+    if os.path.exists(USER_PREFERENCES_FILE):
+        os.remove(USER_PREFERENCES_FILE)
+    print("🗑️ Memory cleared — both RAM and local disk")
+    return None, "", "✅ All memory cleared!"
+# ══════════════════════════════════════════════════════════════════════════════
+# STARTUP: Load previous session from disk
+# ══════════════════════════════════════════════════════════════════════════════
+load_chat_history()
+saved_preferences = load_user_preferences()
+# ══════════════════════════════════════════════════════════════════════════════
+# BUILD THE GRADIO INTERFACE WITH 3 TABS
+# ══════════════════════════════════════════════════════════════════════════════
+with gr.Blocks(title="Multi-Turn AI Assistant with Memory") as demo:
+    gr.Markdown("# 🧠 Multi-Turn AI Assistant with Memory")
+    gr.Markdown("### Scrape websites, fetch transcripts, and chat with persistent memory")
+    with gr.Tabs():
+        # ──────────────────────────────────────────────────────────────────
+        # TAB 1: Website Scraper Q&A (from Version 1)
+        # ──────────────────────────────────────────────────────────────────
+        with gr.Tab("🌐 Website Scraper"):
+            gr.Markdown("## Scrape a Bot-Protected Website and Ask Questions")
+            with gr.Row():
+                url_input = gr.Textbox(
+                    label="Website URL",
+                    placeholder="https://www.goodreads.com/list/show/1.Best_Books_Ever",
+                    scale=4
+                )
+                scrape_btn = gr.Button("🔍 Scrape Website", variant="primary", scale=1)
+            scrape_output = gr.Textbox(label="Scraped Data", lines=8, interactive=False)
+            scrape_btn.click(fn=scrape_and_display, inputs=[url_input], outputs=[scrape_output])
+            gr.Markdown("### Ask Questions About the Scraped Data")
+            web_chat = gr.ChatInterface(
+                fn=ask_ai_website,
+                description="Example: 'What is the top-ranked book?' or 'Who wrote the second book?'",
+                flagging_mode="never"
+            )
+        # ──────────────────────────────────────────────────────────────────
+        # TAB 2: YouTube Transcript Q&A (from Version 2)
+        # ──────────────────────────────────────────────────────────────────
+        with gr.Tab("🎬 YouTube Transcript"):
+            gr.Markdown("## Fetch a YouTube Video Transcript and Ask Questions")
+            gr.Markdown("Enter a YouTube **Video ID** (e.g., `dQw4w9WgXcQ`)")
+            with gr.Row():
+                video_id_input = gr.Textbox(
+                    label="YouTube Video ID",
+                    placeholder="dQw4w9WgXcQ",
+                    scale=4
+                )
+                fetch_btn = gr.Button("📥 Fetch Transcript", variant="primary", scale=1)
+            transcript_output = gr.Textbox(label="Video Transcript", lines=8, interactive=False)
+            fetch_btn.click(fn=fetch_transcript, inputs=[video_id_input], outputs=[transcript_output])
+            gr.Markdown("### Ask Questions About the Video")
+            yt_chat = gr.ChatInterface(
+                fn=ask_ai_youtube,
+                description="Example: 'What is the main topic?' or 'Summarize in 3 bullet points'",
+                flagging_mode="never"
+            )
+        # ──────────────────────────────────────────────────────────────────
+        # TAB 3: Multi-Turn AI Chat with Memory (new in Version 3)
+        # ──────────────────────────────────────────────────────────────────
+        with gr.Tab("💬 AI Chat with Memory"):
+            gr.Markdown("## Multi-Turn AI Chat with Persistent Memory")
+            gr.Markdown(
+                "This chatbot remembers your entire conversation history across sessions. "
+                "You can also set preferences that will influence how the AI responds."
+            )
+            with gr.Row():
+                with gr.Column(scale=3):
+                    # ── Chat area ──
+                    chatbot_display = gr.Chatbot(
+                        label="Conversation",
+                        height=400,
+                        value=get_display_history(),   # Load previous history on startup
+                        type="messages"                # Use the new messages format
+                    )
+                    with gr.Row():
+                        user_input = gr.Textbox(
+                            label="Your message",
+                            placeholder="Type your message here...",
+                            scale=4,
+                            lines=1
+                        )
+                        send_btn = gr.Button("Send ▶️", variant="primary", scale=1)
+                with gr.Column(scale=1):
+                    # ── User Preferences panel ──
+                    gr.Markdown("### ⚙️ User Preferences")
+                    gr.Markdown(
+                        "Set preferences that the AI will follow in all responses. "
+                        "For example: 'Always respond formally' or 'Use bullet points'."
+                    )
+                    preferences_input = gr.Textbox(
+                        label="Your Preferences",
+                        placeholder="e.g., Always respond formally, Use bullet points, Keep answers short...",
+                        lines=6,
+                        value=saved_preferences   # Load saved preferences on startup
+                    )
+                    save_pref_btn = gr.Button("💾 Save Preferences", variant="secondary")
+                    pref_status = gr.Textbox(label="Status", interactive=False, lines=1)
+                    gr.Markdown("---")
+                    clear_btn = gr.Button("🗑️ Clear All Memory", variant="stop")
+                    clear_status = gr.Textbox(label="Clear Status", interactive=False, lines=1)
+            # ── Connect buttons to functions ──
+            def send_message(user_msg, chat_history_display, preferences):
+                """Handle sending a message: get AI response and update display."""
+                if not user_msg or not user_msg.strip():
+                    return "", chat_history_display
+                # Get AI response with full conversation context
+                ai_response = chat_with_memory(user_msg, chat_history_display, preferences)
+                # Update the displayed chat history
+                chat_history_display.append({"role": "user", "content": user_msg})
+                chat_history_display.append({"role": "assistant", "content": ai_response})
+                return "", chat_history_display
+            # Send button click
+            send_btn.click(
+                fn=send_message,
+                inputs=[user_input, chatbot_display, preferences_input],
+                outputs=[user_input, chatbot_display]
+            )
+            # Also send on Enter key
+            user_input.submit(
+                fn=send_message,
+                inputs=[user_input, chatbot_display, preferences_input],
+                outputs=[user_input, chatbot_display]
+            )
+            # Save preferences button
+            save_pref_btn.click(
+                fn=save_preferences_btn,
+                inputs=[preferences_input],
+                outputs=[pref_status]
+            )
+            # Clear memory button
+            clear_btn.click(
+                fn=clear_memory,
+                outputs=[chatbot_display, preferences_input, clear_status]
+            )
+# ── Launch the app ──
+if __name__ == "__main__":
+    demo.launch(inbrowser=True)

app.py CHANGED Viewed

@@ -232,14 +232,25 @@ Scraped Data:
 # ══════════════════════════════════════════════════════════════════════════════
 def fetch_transcript(video_id):
-    """Fetch the transcript of a YouTube video using its Video ID."""
     global transcript_data
     if not video_id or not video_id.strip():
         return "❗ Please enter a valid YouTube Video ID."
     video_id = video_id.strip()
     try:
-        api = YouTubeTranscriptApi()
         transcript = api.fetch(video_id)
         transcript_text = " ".join([snippet.text for snippet in transcript])
         transcript_data = transcript_text

 # ══════════════════════════════════════════════════════════════════════════════
 def fetch_transcript(video_id):
+    """Fetch the transcript of a YouTube video. Uses Bright Data proxy if available."""
     global transcript_data
     if not video_id or not video_id.strip():
         return "❗ Please enter a valid YouTube Video ID."
     video_id = video_id.strip()
     try:
+        # Use Bright Data proxy if credentials are available
+        # Needed on HF Spaces where YouTube DNS may not resolve
+        if bright_data_username and bright_data_password:
+            from youtube_transcript_api.proxies import GenericProxyConfig
+            proxy_url = f"http://{bright_data_username}:{bright_data_password}@brd.superproxy.io:33335"
+            proxy_config = GenericProxyConfig(http_url=proxy_url, https_url=proxy_url)
+            api = YouTubeTranscriptApi(proxy_config=proxy_config)
+            print(f"Fetching transcript via Bright Data proxy for video: {video_id}")
+        else:
+            api = YouTubeTranscriptApi()
+            print(f"Fetching transcript directly for video: {video_id}")
         transcript = api.fetch(video_id)
         transcript_text = " ".join([snippet.text for snippet in transcript])
         transcript_data = transcript_text