gmustafa413 commited on
Commit
37eb186
Β·
verified Β·
1 Parent(s): 03bf0d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -18
app.py CHANGED
@@ -1,11 +1,10 @@
1
  import os
2
  import gradio as gr
3
- from sentence_transformers import SentenceTransformer
4
- import faiss
5
  import numpy as np
6
  import google.generativeai as genai
 
 
7
  from datasets import load_dataset
8
- from typing import List, Dict
9
  from dotenv import load_dotenv
10
 
11
  # Load environment variables
@@ -14,19 +13,22 @@ load_dotenv()
14
  # Configuration
15
  MODEL_NAME = "all-MiniLM-L6-v2"
16
  GENAI_MODEL = "gemini-pro"
17
- DATASET_LINK = "https://huggingface.co/datasets/midrees2806/7K_Dataset " # Replace with your dataset link
18
  CHUNK_SIZE = 500
19
  TOP_K = 3
20
 
21
  # Initialize models
22
- embedding_model = SentenceTransformer(MODEL_NAME)
 
 
 
23
 
24
- class GroqRAGSystem:
25
  def __init__(self):
26
  self.index = None
27
  self.chunks = []
28
  self.dataset_loaded = False
29
- self.gemini_api_key = os.getenv("AIzaSyASrFvE3gFPigihza0JTuALzZmBx0Kc3d0")
30
  if self.gemini_api_key:
31
  genai.configure(api_key=self.gemini_api_key)
32
 
@@ -34,7 +36,7 @@ class GroqRAGSystem:
34
  """Load dataset from Hugging Face link"""
35
  try:
36
  # Extract dataset name from URL
37
- dataset_name = dataset_link.split("datasets/")[-1].split("/")[0]
38
  if not dataset_name:
39
  raise ValueError("Invalid dataset URL format")
40
 
@@ -61,11 +63,14 @@ class GroqRAGSystem:
61
  progress(1.0, desc="βœ… Dataset loaded successfully!")
62
  return True
63
  except Exception as e:
64
- gr.Error(f"Failed to load dataset: {str(e)}")
65
  return False
66
 
67
  def get_relevant_context(self, query: str) -> str:
68
  """Retrieve most relevant chunks with scores"""
 
 
 
69
  query_embed = embedding_model.encode([query])
70
  scores, indices = self.index.search(query_embed.astype('float32'), k=TOP_K)
71
 
@@ -83,6 +88,8 @@ class GroqRAGSystem:
83
  return "πŸ”‘ Please set your Gemini API key in environment variables"
84
 
85
  context = self.get_relevant_context(query)
 
 
86
 
87
  prompt = f"""You are an expert AI assistant that answers STRICTLY based on the provided context.
88
  Follow these rules:
@@ -90,12 +97,9 @@ class GroqRAGSystem:
90
  2. If the answer isn't in the context, say "I couldn't find this in the dataset"
91
  3. Never make up information
92
  4. For ambiguous questions, ask for clarification
93
-
94
  Context:
95
  {context}
96
-
97
  Question: {query}
98
-
99
  Answer:"""
100
 
101
  try:
@@ -106,7 +110,7 @@ class GroqRAGSystem:
106
  return f"⚠️ Error generating response: {str(e)}"
107
 
108
  # Initialize the RAG system
109
- rag_system = GroqRAGSystem()
110
 
111
  # Custom CSS for modern UI
112
  css = """
@@ -149,12 +153,12 @@ with gr.Blocks(css=css, theme=gr.themes.Default()) as app:
149
  # Store chat history
150
  chat_history = gr.State([])
151
 
152
- gr.Markdown("UE-ChatBot")
153
  gr.Markdown(f"**Dataset:** {DATASET_LINK}")
154
 
155
  with gr.Row():
156
  with gr.Column(scale=1):
157
- gr.Markdown("## βš™οΈ Configuration")
158
  dataset_url = gr.Textbox(
159
  label="Hugging Face Dataset URL",
160
  value=DATASET_LINK,
@@ -177,8 +181,9 @@ with gr.Blocks(css=css, theme=gr.themes.Default()) as app:
177
  placeholder="Ask about the dataset content",
178
  autofocus=True
179
  )
180
- submit_btn = gr.Button("πŸ“€ Submit", variant="primary")
181
- clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary")
 
182
 
183
  # Event handlers
184
  def load_dataset(dataset_url):
@@ -225,6 +230,5 @@ with gr.Blocks(css=css, theme=gr.themes.Default()) as app:
225
  outputs=chatbot
226
  )
227
 
228
- # For Hugging Face Spaces
229
  if __name__ == "__main__":
230
  app.launch(debug=True)
 
1
  import os
2
  import gradio as gr
 
 
3
  import numpy as np
4
  import google.generativeai as genai
5
+ from sentence_transformers import SentenceTransformer
6
+ import faiss
7
  from datasets import load_dataset
 
8
  from dotenv import load_dotenv
9
 
10
  # Load environment variables
 
13
  # Configuration
14
  MODEL_NAME = "all-MiniLM-L6-v2"
15
  GENAI_MODEL = "gemini-pro"
16
+ DATASET_LINK = "https://huggingface.co/datasets/midrees2806/7K_Dataset"
17
  CHUNK_SIZE = 500
18
  TOP_K = 3
19
 
20
  # Initialize models
21
+ try:
22
+ embedding_model = SentenceTransformer(MODEL_NAME)
23
+ except Exception as e:
24
+ raise RuntimeError(f"Failed to initialize embedding model: {str(e)}")
25
 
26
+ class GeminiRAGSystem:
27
  def __init__(self):
28
  self.index = None
29
  self.chunks = []
30
  self.dataset_loaded = False
31
+ self.gemini_api_key = os.getenv("GEMINI_API_KEY")
32
  if self.gemini_api_key:
33
  genai.configure(api_key=self.gemini_api_key)
34
 
 
36
  """Load dataset from Hugging Face link"""
37
  try:
38
  # Extract dataset name from URL
39
+ dataset_name = dataset_link.split("datasets/")[-1].split("/")[0].strip()
40
  if not dataset_name:
41
  raise ValueError("Invalid dataset URL format")
42
 
 
63
  progress(1.0, desc="βœ… Dataset loaded successfully!")
64
  return True
65
  except Exception as e:
66
+ gr.Warning(f"Failed to load dataset: {str(e)}")
67
  return False
68
 
69
  def get_relevant_context(self, query: str) -> str:
70
  """Retrieve most relevant chunks with scores"""
71
+ if not self.index or not self.chunks:
72
+ return ""
73
+
74
  query_embed = embedding_model.encode([query])
75
  scores, indices = self.index.search(query_embed.astype('float32'), k=TOP_K)
76
 
 
88
  return "πŸ”‘ Please set your Gemini API key in environment variables"
89
 
90
  context = self.get_relevant_context(query)
91
+ if not context:
92
+ return "No relevant context found in dataset"
93
 
94
  prompt = f"""You are an expert AI assistant that answers STRICTLY based on the provided context.
95
  Follow these rules:
 
97
  2. If the answer isn't in the context, say "I couldn't find this in the dataset"
98
  3. Never make up information
99
  4. For ambiguous questions, ask for clarification
 
100
  Context:
101
  {context}
 
102
  Question: {query}
 
103
  Answer:"""
104
 
105
  try:
 
110
  return f"⚠️ Error generating response: {str(e)}"
111
 
112
  # Initialize the RAG system
113
+ rag_system = GeminiRAGSystem()
114
 
115
  # Custom CSS for modern UI
116
  css = """
 
153
  # Store chat history
154
  chat_history = gr.State([])
155
 
156
+ gr.Markdown("## UE-ChatBot")
157
  gr.Markdown(f"**Dataset:** {DATASET_LINK}")
158
 
159
  with gr.Row():
160
  with gr.Column(scale=1):
161
+ gr.Markdown("### βš™οΈ Configuration")
162
  dataset_url = gr.Textbox(
163
  label="Hugging Face Dataset URL",
164
  value=DATASET_LINK,
 
181
  placeholder="Ask about the dataset content",
182
  autofocus=True
183
  )
184
+ with gr.Row():
185
+ submit_btn = gr.Button("πŸ“€ Submit", variant="primary")
186
+ clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary")
187
 
188
  # Event handlers
189
  def load_dataset(dataset_url):
 
230
  outputs=chatbot
231
  )
232
 
 
233
  if __name__ == "__main__":
234
  app.launch(debug=True)