Spaces:

gmustafa413
/

UE_ChatBot

Sleeping

App Files Files Community

gmustafa413 commited on Mar 27, 2025

Commit

03bf0d5

verified ·

1 Parent(s): a29fcf8

Create app.py

Browse files

Files changed (1) hide show

app.py +230 -0

app.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import os
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+import google.generativeai as genai
+from datasets import load_dataset
+from typing import List, Dict
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Configuration
+MODEL_NAME = "all-MiniLM-L6-v2"
+GENAI_MODEL = "gemini-pro"
+DATASET_LINK = "https://huggingface.co/datasets/midrees2806/7K_Dataset "  # Replace with your dataset link
+CHUNK_SIZE = 500
+TOP_K = 3
+# Initialize models
+embedding_model = SentenceTransformer(MODEL_NAME)
+class GroqRAGSystem:
+    def __init__(self):
+        self.index = None
+        self.chunks = []
+        self.dataset_loaded = False
+        self.gemini_api_key = os.getenv("AIzaSyASrFvE3gFPigihza0JTuALzZmBx0Kc3d0")
+        if self.gemini_api_key:
+            genai.configure(api_key=self.gemini_api_key)
+    def load_dataset_from_link(self, dataset_link: str):
+        """Load dataset from Hugging Face link"""
+        try:
+            # Extract dataset name from URL
+            dataset_name = dataset_link.split("datasets/")[-1].split("/")[0]
+            if not dataset_name:
+                raise ValueError("Invalid dataset URL format")
+            with gr.Progress() as progress:
+                progress(0.1, desc="📦 Downloading dataset...")
+                dataset = load_dataset(dataset_name, split='train')
+                progress(0.5, desc="🔨 Processing dataset...")
+                if 'context' in dataset.features:
+                    self.chunks = list(set(dataset['context']))
+                elif 'text' in dataset.features:
+                    self.chunks = dataset['text']
+                elif 'question' in dataset.features and 'answer' in dataset.features:
+                    self.chunks = [f"Q: {q}\nA: {a}" for q, a in zip(dataset['question'], dataset['answer'])]
+                else:
+                    raise ValueError("Unsupported dataset format")
+                progress(0.7, desc="🧠 Creating embeddings...")
+                embeddings = embedding_model.encode(self.chunks, show_progress_bar=False)
+                self.index = faiss.IndexFlatL2(embeddings.shape[1])
+                self.index.add(embeddings.astype('float32'))
+                self.dataset_loaded = True
+                progress(1.0, desc="✅ Dataset loaded successfully!")
+            return True
+        except Exception as e:
+            gr.Error(f"Failed to load dataset: {str(e)}")
+            return False
+    def get_relevant_context(self, query: str) -> str:
+        """Retrieve most relevant chunks with scores"""
+        query_embed = embedding_model.encode([query])
+        scores, indices = self.index.search(query_embed.astype('float32'), k=TOP_K)
+        context = []
+        for i, (score, idx) in enumerate(zip(scores[0], indices[0])):
+            if idx < len(self.chunks):
+                context.append(f"🔍 Match {i+1} (Score: {1-score:.2f}):\n{self.chunks[idx]}\n")
+        return "\n".join(context)
+    def generate_response(self, query: str) -> str:
+        """Generate response using only dataset context"""
+        if not self.dataset_loaded:
+            return "⚠️ Please load the dataset first"
+        if not self.gemini_api_key:
+            return "🔑 Please set your Gemini API key in environment variables"
+        context = self.get_relevant_context(query)
+        prompt = f"""You are an expert AI assistant that answers STRICTLY based on the provided context.
+        Follow these rules:
+        1. Answer concisely using ONLY the context below
+        2. If the answer isn't in the context, say "I couldn't find this in the dataset"
+        3. Never make up information
+        4. For ambiguous questions, ask for clarification
+        Context:
+        {context}
+        Question: {query}
+        Answer:"""
+        try:
+            model = genai.GenerativeModel(GENAI_MODEL)
+            response = model.generate_content(prompt)
+            return response.text
+        except Exception as e:
+            return f"⚠️ Error generating response: {str(e)}"
+# Initialize the RAG system
+rag_system = GroqRAGSystem()
+# Custom CSS for modern UI
+css = """
+.gradio-container {
+    max-width: 900px !important;
+    margin: auto !important;
+    font-family: 'Inter', sans-serif;
+}
+.dark .gradio-container {
+    background-color: #1e1e2e;
+}
+.message-user {
+    background: #3b82f6;
+    color: white;
+    border-radius: 18px 18px 0 18px;
+    padding: 12px;
+    margin: 8px 0;
+    max-width: 80%;
+    margin-left: auto;
+}
+.message-bot {
+    background: #f3f4f6;
+    color: #111827;
+    border-radius: 18px 18px 18px 0;
+    padding: 12px;
+    margin: 8px 0;
+    max-width: 80%;
+}
+.dark .message-bot {
+    background: #2d3748;
+    color: #f7fafc;
+}
+.progress-bar {
+    height: 6px !important;
+}
+"""
+# Chat interface
+with gr.Blocks(css=css, theme=gr.themes.Default()) as app:
+    # Store chat history
+    chat_history = gr.State([])
+    gr.Markdown("UE-ChatBot")
+    gr.Markdown(f"**Dataset:** {DATASET_LINK}")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("## ⚙️ Configuration")
+            dataset_url = gr.Textbox(
+                label="Hugging Face Dataset URL",
+                value=DATASET_LINK,
+                interactive=True
+            )
+            load_btn = gr.Button("🚀 Load Dataset", variant="primary")
+            status = gr.Markdown("ℹ️ Please load the dataset first")
+        with gr.Column(scale=2):
+            chatbot = gr.Chatbot(
+                label="Chat History",
+                bubble_full_width=False,
+                avatar_images=(
+                    "https://avatars.githubusercontent.com/u/1561194?v=4",  # User avatar
+                    "https://huggingface.co/spaces/groq/Groq-LLM/resolve/main/groq_logo.png"  # Bot avatar
+                )
+            )
+            query = gr.Textbox(
+                label="Type your question...",
+                placeholder="Ask about the dataset content",
+                autofocus=True
+            )
+            submit_btn = gr.Button("📤 Submit", variant="primary")
+            clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
+    # Event handlers
+    def load_dataset(dataset_url):
+        if rag_system.load_dataset_from_link(dataset_url):
+            return "✅ Dataset loaded successfully!"
+        return "❌ Failed to load dataset"
+    def respond(query, history):
+        if not query.strip():
+            return history, ""
+        # Add user message
+        history.append((query, None))
+        # Get response
+        response = rag_system.generate_response(query)
+        # Update history
+        history[-1] = (query, response)
+        return history, ""
+    # Connect components
+    load_btn.click(
+        load_dataset,
+        inputs=dataset_url,
+        outputs=status
+    )
+    submit_btn.click(
+        respond,
+        inputs=[query, chat_history],
+        outputs=[chatbot, query]
+    )
+    query.submit(
+        respond,
+        inputs=[query, chat_history],
+        outputs=[chatbot, query]
+    )
+    clear_btn.click(
+        lambda: [],
+        inputs=None,
+        outputs=chatbot
+    )
+# For Hugging Face Spaces
+if __name__ == "__main__":
+    app.launch(debug=True)