Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,15 +1,13 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import numpy as np
|
| 4 |
-
import pandas as pd
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 7 |
import PyPDF2
|
| 8 |
import docx
|
| 9 |
import requests
|
| 10 |
import json
|
| 11 |
-
import
|
| 12 |
-
from typing import List, Tuple
|
| 13 |
import logging
|
| 14 |
|
| 15 |
# Configure logging
|
|
@@ -93,9 +91,9 @@ class RAGSystem:
|
|
| 93 |
if self.documents:
|
| 94 |
# Create embeddings for all document chunks
|
| 95 |
self.embeddings = self.embedder.encode(self.documents)
|
| 96 |
-
return f"
|
| 97 |
else:
|
| 98 |
-
return "No text could be extracted from the uploaded files."
|
| 99 |
|
| 100 |
def split_text(self, text: str, chunk_size: int = 500) -> List[str]:
|
| 101 |
"""Split text into smaller chunks"""
|
|
@@ -136,7 +134,7 @@ class RAGSystem:
|
|
| 136 |
def query_groq(self, prompt: str) -> str:
|
| 137 |
"""Query Groq API with the given prompt"""
|
| 138 |
if not self.groq_api_key:
|
| 139 |
-
return "Please set your Groq API key first."
|
| 140 |
|
| 141 |
headers = {
|
| 142 |
"Authorization": f"Bearer {self.groq_api_key}",
|
|
@@ -144,7 +142,7 @@ class RAGSystem:
|
|
| 144 |
}
|
| 145 |
|
| 146 |
data = {
|
| 147 |
-
"model": "
|
| 148 |
"messages": [
|
| 149 |
{
|
| 150 |
"role": "system",
|
|
@@ -156,31 +154,35 @@ class RAGSystem:
|
|
| 156 |
}
|
| 157 |
],
|
| 158 |
"temperature": 0.7,
|
| 159 |
-
"max_tokens": 1024
|
|
|
|
| 160 |
}
|
| 161 |
|
| 162 |
try:
|
| 163 |
response = requests.post(self.groq_base_url, headers=headers, json=data)
|
| 164 |
response.raise_for_status()
|
| 165 |
result = response.json()
|
| 166 |
-
return result[
|
| 167 |
except requests.exceptions.RequestException as e:
|
| 168 |
logger.error(f"Error querying Groq API: {e}")
|
| 169 |
return f"Error querying Groq API: {str(e)}"
|
|
|
|
|
|
|
|
|
|
| 170 |
|
| 171 |
def answer_query(self, query: str) -> str:
|
| 172 |
"""Answer a query using RAG"""
|
| 173 |
if not self.documents:
|
| 174 |
-
return "No documents have been processed yet. Please upload and process documents first."
|
| 175 |
|
| 176 |
if not self.groq_api_key:
|
| 177 |
-
return "Please set your Groq API key first."
|
| 178 |
|
| 179 |
# Retrieve relevant chunks
|
| 180 |
relevant_chunks = self.retrieve_relevant_chunks(query)
|
| 181 |
|
| 182 |
if not relevant_chunks:
|
| 183 |
-
return "No relevant information found in the documents."
|
| 184 |
|
| 185 |
# Create context from relevant chunks
|
| 186 |
context = "\n\n".join(relevant_chunks)
|
|
@@ -204,16 +206,16 @@ rag_system = RAGSystem()
|
|
| 204 |
# Gradio interface functions
|
| 205 |
def set_api_key(api_key):
|
| 206 |
rag_system.set_api_key(api_key)
|
| 207 |
-
return "API key set successfully!"
|
| 208 |
|
| 209 |
def process_files(files):
|
| 210 |
if not files:
|
| 211 |
-
return "Please upload at least one file."
|
| 212 |
return rag_system.process_documents(files)
|
| 213 |
|
| 214 |
def answer_question(query):
|
| 215 |
if not query.strip():
|
| 216 |
-
return "Please enter a question."
|
| 217 |
return rag_system.answer_query(query)
|
| 218 |
|
| 219 |
# Create Gradio interface
|
|
@@ -258,7 +260,7 @@ with gr.Blocks(title="RAG Document Q&A System", theme=gr.themes.Soft()) as demo:
|
|
| 258 |
lines=2
|
| 259 |
)
|
| 260 |
with gr.Column(scale=1):
|
| 261 |
-
ask_btn = gr.Button("Ask Question", variant="primary"
|
| 262 |
|
| 263 |
answer_output = gr.Textbox(
|
| 264 |
label="Answer",
|
|
@@ -305,4 +307,4 @@ with gr.Blocks(title="RAG Document Q&A System", theme=gr.themes.Soft()) as demo:
|
|
| 305 |
)
|
| 306 |
|
| 307 |
if __name__ == "__main__":
|
| 308 |
-
demo.launch(share=True)
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
import numpy as np
|
|
|
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
import PyPDF2
|
| 7 |
import docx
|
| 8 |
import requests
|
| 9 |
import json
|
| 10 |
+
from typing import List
|
|
|
|
| 11 |
import logging
|
| 12 |
|
| 13 |
# Configure logging
|
|
|
|
| 91 |
if self.documents:
|
| 92 |
# Create embeddings for all document chunks
|
| 93 |
self.embeddings = self.embedder.encode(self.documents)
|
| 94 |
+
return f"✅ Processed {len(files)} files with {len(self.documents)} text chunks."
|
| 95 |
else:
|
| 96 |
+
return "⚠️ No text could be extracted from the uploaded files."
|
| 97 |
|
| 98 |
def split_text(self, text: str, chunk_size: int = 500) -> List[str]:
|
| 99 |
"""Split text into smaller chunks"""
|
|
|
|
| 134 |
def query_groq(self, prompt: str) -> str:
|
| 135 |
"""Query Groq API with the given prompt"""
|
| 136 |
if not self.groq_api_key:
|
| 137 |
+
return "⚠️ Please set your Groq API key first."
|
| 138 |
|
| 139 |
headers = {
|
| 140 |
"Authorization": f"Bearer {self.groq_api_key}",
|
|
|
|
| 142 |
}
|
| 143 |
|
| 144 |
data = {
|
| 145 |
+
"model": "llama-3.1-8b-instant", # ✅ Valid Groq model
|
| 146 |
"messages": [
|
| 147 |
{
|
| 148 |
"role": "system",
|
|
|
|
| 154 |
}
|
| 155 |
],
|
| 156 |
"temperature": 0.7,
|
| 157 |
+
"max_tokens": 1024,
|
| 158 |
+
"stream": False
|
| 159 |
}
|
| 160 |
|
| 161 |
try:
|
| 162 |
response = requests.post(self.groq_base_url, headers=headers, json=data)
|
| 163 |
response.raise_for_status()
|
| 164 |
result = response.json()
|
| 165 |
+
return result["choices"][0]["message"]["content"]
|
| 166 |
except requests.exceptions.RequestException as e:
|
| 167 |
logger.error(f"Error querying Groq API: {e}")
|
| 168 |
return f"Error querying Groq API: {str(e)}"
|
| 169 |
+
except KeyError:
|
| 170 |
+
logger.error(f"Unexpected Groq API response: {result}")
|
| 171 |
+
return f"Unexpected Groq API response: {json.dumps(result, indent=2)}"
|
| 172 |
|
| 173 |
def answer_query(self, query: str) -> str:
|
| 174 |
"""Answer a query using RAG"""
|
| 175 |
if not self.documents:
|
| 176 |
+
return "⚠️ No documents have been processed yet. Please upload and process documents first."
|
| 177 |
|
| 178 |
if not self.groq_api_key:
|
| 179 |
+
return "⚠️ Please set your Groq API key first."
|
| 180 |
|
| 181 |
# Retrieve relevant chunks
|
| 182 |
relevant_chunks = self.retrieve_relevant_chunks(query)
|
| 183 |
|
| 184 |
if not relevant_chunks:
|
| 185 |
+
return "⚠️ No relevant information found in the documents."
|
| 186 |
|
| 187 |
# Create context from relevant chunks
|
| 188 |
context = "\n\n".join(relevant_chunks)
|
|
|
|
| 206 |
# Gradio interface functions
|
| 207 |
def set_api_key(api_key):
|
| 208 |
rag_system.set_api_key(api_key)
|
| 209 |
+
return "✅ API key set successfully!"
|
| 210 |
|
| 211 |
def process_files(files):
|
| 212 |
if not files:
|
| 213 |
+
return "⚠️ Please upload at least one file."
|
| 214 |
return rag_system.process_documents(files)
|
| 215 |
|
| 216 |
def answer_question(query):
|
| 217 |
if not query.strip():
|
| 218 |
+
return "⚠️ Please enter a question."
|
| 219 |
return rag_system.answer_query(query)
|
| 220 |
|
| 221 |
# Create Gradio interface
|
|
|
|
| 260 |
lines=2
|
| 261 |
)
|
| 262 |
with gr.Column(scale=1):
|
| 263 |
+
ask_btn = gr.Button("Ask Question", variant="primary")
|
| 264 |
|
| 265 |
answer_output = gr.Textbox(
|
| 266 |
label="Answer",
|
|
|
|
| 307 |
)
|
| 308 |
|
| 309 |
if __name__ == "__main__":
|
| 310 |
+
demo.launch(share=True)
|