Spaces:

gmustafa413
/

UE_ChatBot

Sleeping

App Files Files Community

gmustafa413 commited on Mar 27, 2025

Commit

37eb186

verified ·

1 Parent(s): 03bf0d5

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -18

app.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import os
 import gradio as gr
-from sentence_transformers import SentenceTransformer
-import faiss
 import numpy as np
 import google.generativeai as genai
 from datasets import load_dataset
-from typing import List, Dict
 from dotenv import load_dotenv
 # Load environment variables
@@ -14,19 +13,22 @@ load_dotenv()
 # Configuration
 MODEL_NAME = "all-MiniLM-L6-v2"
 GENAI_MODEL = "gemini-pro"
-DATASET_LINK = "https://huggingface.co/datasets/midrees2806/7K_Dataset "  # Replace with your dataset link
 CHUNK_SIZE = 500
 TOP_K = 3
 # Initialize models
-embedding_model = SentenceTransformer(MODEL_NAME)
-class GroqRAGSystem:
     def __init__(self):
         self.index = None
         self.chunks = []
         self.dataset_loaded = False
-        self.gemini_api_key = os.getenv("AIzaSyASrFvE3gFPigihza0JTuALzZmBx0Kc3d0")
         if self.gemini_api_key:
             genai.configure(api_key=self.gemini_api_key)
@@ -34,7 +36,7 @@ class GroqRAGSystem:
         """Load dataset from Hugging Face link"""
         try:
             # Extract dataset name from URL
-            dataset_name = dataset_link.split("datasets/")[-1].split("/")[0]
             if not dataset_name:
                 raise ValueError("Invalid dataset URL format")
@@ -61,11 +63,14 @@ class GroqRAGSystem:
                 progress(1.0, desc="✅ Dataset loaded successfully!")
             return True
         except Exception as e:
-            gr.Error(f"Failed to load dataset: {str(e)}")
             return False
     def get_relevant_context(self, query: str) -> str:
         """Retrieve most relevant chunks with scores"""
         query_embed = embedding_model.encode([query])
         scores, indices = self.index.search(query_embed.astype('float32'), k=TOP_K)
@@ -83,6 +88,8 @@ class GroqRAGSystem:
             return "🔑 Please set your Gemini API key in environment variables"
         context = self.get_relevant_context(query)
         prompt = f"""You are an expert AI assistant that answers STRICTLY based on the provided context.
         Follow these rules:
@@ -90,12 +97,9 @@ class GroqRAGSystem:
         2. If the answer isn't in the context, say "I couldn't find this in the dataset"
         3. Never make up information
         4. For ambiguous questions, ask for clarification
         Context:
         {context}
         Question: {query}
         Answer:"""
         try:
@@ -106,7 +110,7 @@ class GroqRAGSystem:
             return f"⚠️ Error generating response: {str(e)}"
 # Initialize the RAG system
-rag_system = GroqRAGSystem()
 # Custom CSS for modern UI
 css = """
@@ -149,12 +153,12 @@ with gr.Blocks(css=css, theme=gr.themes.Default()) as app:
     # Store chat history
     chat_history = gr.State([])
-    gr.Markdown("UE-ChatBot")
     gr.Markdown(f"**Dataset:** {DATASET_LINK}")
     with gr.Row():
         with gr.Column(scale=1):
-            gr.Markdown("## ⚙️ Configuration")
             dataset_url = gr.Textbox(
                 label="Hugging Face Dataset URL",
                 value=DATASET_LINK,
@@ -177,8 +181,9 @@ with gr.Blocks(css=css, theme=gr.themes.Default()) as app:
                 placeholder="Ask about the dataset content",
                 autofocus=True
             )
-            submit_btn = gr.Button("📤 Submit", variant="primary")
-            clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
     # Event handlers
     def load_dataset(dataset_url):
@@ -225,6 +230,5 @@ with gr.Blocks(css=css, theme=gr.themes.Default()) as app:
         outputs=chatbot
     )
-# For Hugging Face Spaces
 if __name__ == "__main__":
     app.launch(debug=True)

 import os
 import gradio as gr
 import numpy as np
 import google.generativeai as genai
+from sentence_transformers import SentenceTransformer
+import faiss
 from datasets import load_dataset
 from dotenv import load_dotenv
 # Load environment variables
 # Configuration
 MODEL_NAME = "all-MiniLM-L6-v2"
 GENAI_MODEL = "gemini-pro"
+DATASET_LINK = "https://huggingface.co/datasets/midrees2806/7K_Dataset"
 CHUNK_SIZE = 500
 TOP_K = 3
 # Initialize models
+try:
+    embedding_model = SentenceTransformer(MODEL_NAME)
+except Exception as e:
+    raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
+class GeminiRAGSystem:
     def __init__(self):
         self.index = None
         self.chunks = []
         self.dataset_loaded = False
+        self.gemini_api_key = os.getenv("GEMINI_API_KEY")
         if self.gemini_api_key:
             genai.configure(api_key=self.gemini_api_key)
         """Load dataset from Hugging Face link"""
         try:
             # Extract dataset name from URL
+            dataset_name = dataset_link.split("datasets/")[-1].split("/")[0].strip()
             if not dataset_name:
                 raise ValueError("Invalid dataset URL format")
                 progress(1.0, desc="✅ Dataset loaded successfully!")
             return True
         except Exception as e:
+            gr.Warning(f"Failed to load dataset: {str(e)}")
             return False
     def get_relevant_context(self, query: str) -> str:
         """Retrieve most relevant chunks with scores"""
+        if not self.index or not self.chunks:
+            return ""
         query_embed = embedding_model.encode([query])
         scores, indices = self.index.search(query_embed.astype('float32'), k=TOP_K)
             return "🔑 Please set your Gemini API key in environment variables"
         context = self.get_relevant_context(query)
+        if not context:
+            return "No relevant context found in dataset"
         prompt = f"""You are an expert AI assistant that answers STRICTLY based on the provided context.
         Follow these rules:
         2. If the answer isn't in the context, say "I couldn't find this in the dataset"
         3. Never make up information
         4. For ambiguous questions, ask for clarification
         Context:
         {context}
         Question: {query}
         Answer:"""
         try:
             return f"⚠️ Error generating response: {str(e)}"
 # Initialize the RAG system
+rag_system = GeminiRAGSystem()
 # Custom CSS for modern UI
 css = """
     # Store chat history
     chat_history = gr.State([])
+    gr.Markdown("## UE-ChatBot")
     gr.Markdown(f"**Dataset:** {DATASET_LINK}")
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Configuration")
             dataset_url = gr.Textbox(
                 label="Hugging Face Dataset URL",
                 value=DATASET_LINK,
                 placeholder="Ask about the dataset content",
                 autofocus=True
             )
+            with gr.Row():
+                submit_btn = gr.Button("📤 Submit", variant="primary")
+                clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary")
     # Event handlers
     def load_dataset(dataset_url):
         outputs=chatbot
     )
 if __name__ == "__main__":
     app.launch(debug=True)