Spaces:
Build error
Build error
Upload 3 files
Browse files- app.py +8 -2
- data_processing.py +5 -7
- evaluation.py +5 -2
app.py
CHANGED
|
@@ -2,7 +2,8 @@ import streamlit as st
|
|
| 2 |
from generator import generate_response_from_document
|
| 3 |
from retrieval import retrieve_documents
|
| 4 |
from evaluation import calculate_metrics
|
| 5 |
-
from data_processing import load_data_from_faiss
|
|
|
|
| 6 |
|
| 7 |
# Page Title
|
| 8 |
st.title("RAG7 - Real World RAG System")
|
|
@@ -13,14 +14,19 @@ def load_data():
|
|
| 13 |
|
| 14 |
data_status = load_data()
|
| 15 |
|
|
|
|
|
|
|
| 16 |
# Question Section
|
| 17 |
st.subheader("Hi, What do you want to know today?")
|
| 18 |
question = st.text_area("Enter your question:", placeholder="Type your question here...", height=100)
|
| 19 |
|
| 20 |
# Submit Button
|
| 21 |
if st.button("Submit"):
|
|
|
|
| 22 |
retrieved_documents = retrieve_documents(question, 5)
|
| 23 |
response = generate_response_from_document(question, retrieved_documents)
|
|
|
|
|
|
|
| 24 |
else:
|
| 25 |
response = ""
|
| 26 |
|
|
@@ -35,7 +41,7 @@ col1, col2 = st.columns([1, 3]) # Creating two columns for button and metrics d
|
|
| 35 |
|
| 36 |
with col1:
|
| 37 |
if st.button("Calculate Metrics"):
|
| 38 |
-
metrics = calculate_metrics(question, response, retrieved_documents,
|
| 39 |
else:
|
| 40 |
metrics = ""
|
| 41 |
|
|
|
|
| 2 |
from generator import generate_response_from_document
|
| 3 |
from retrieval import retrieve_documents
|
| 4 |
from evaluation import calculate_metrics
|
| 5 |
+
from data_processing import load_data_from_faiss
|
| 6 |
+
import time
|
| 7 |
|
| 8 |
# Page Title
|
| 9 |
st.title("RAG7 - Real World RAG System")
|
|
|
|
| 14 |
|
| 15 |
data_status = load_data()
|
| 16 |
|
| 17 |
+
time_taken_for_response = 'N/A'
|
| 18 |
+
|
| 19 |
# Question Section
|
| 20 |
st.subheader("Hi, What do you want to know today?")
|
| 21 |
question = st.text_area("Enter your question:", placeholder="Type your question here...", height=100)
|
| 22 |
|
| 23 |
# Submit Button
|
| 24 |
if st.button("Submit"):
|
| 25 |
+
start_time = time.time()
|
| 26 |
retrieved_documents = retrieve_documents(question, 5)
|
| 27 |
response = generate_response_from_document(question, retrieved_documents)
|
| 28 |
+
end_time = time.time()
|
| 29 |
+
time_taken_for_response = end_time-start_time
|
| 30 |
else:
|
| 31 |
response = ""
|
| 32 |
|
|
|
|
| 41 |
|
| 42 |
with col1:
|
| 43 |
if st.button("Calculate Metrics"):
|
| 44 |
+
metrics = calculate_metrics(question, response, retrieved_documents, time_taken_for_response)
|
| 45 |
else:
|
| 46 |
metrics = ""
|
| 47 |
|
data_processing.py
CHANGED
|
@@ -15,8 +15,6 @@ embedding_model = HuggingFaceEmbeddings(
|
|
| 15 |
)
|
| 16 |
|
| 17 |
all_documents = []
|
| 18 |
-
index = None
|
| 19 |
-
actual_docs = None
|
| 20 |
ragbench = {}
|
| 21 |
|
| 22 |
|
|
@@ -39,9 +37,10 @@ def create_faiss_index_file():
|
|
| 39 |
# Convert embeddings to a NumPy array
|
| 40 |
embeddings_np = np.array(embeddings, dtype=np.float32)
|
| 41 |
|
|
|
|
| 42 |
# Store in FAISS using the NumPy array's shape
|
| 43 |
-
|
| 44 |
-
|
| 45 |
|
| 46 |
# Save FAISS index
|
| 47 |
faiss.write_index(index, f"data_local/rag7_index.faiss")
|
|
@@ -53,7 +52,6 @@ def create_faiss_index_file():
|
|
| 53 |
print(f"data is stored!")
|
| 54 |
|
| 55 |
def load_data_from_faiss():
|
| 56 |
-
load_ragbench()
|
| 57 |
load_faiss()
|
| 58 |
load_metatdata()
|
| 59 |
|
|
@@ -63,11 +61,11 @@ def load_ragbench():
|
|
| 63 |
ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
|
| 64 |
|
| 65 |
def load_faiss():
|
| 66 |
-
|
| 67 |
faiss_index_path = f"data_local/rag7_index.faiss"
|
| 68 |
index = faiss.read_index(faiss_index_path)
|
| 69 |
|
| 70 |
def load_metatdata():
|
| 71 |
-
|
| 72 |
with open(f"data_local/rag7_docs.json", "r") as f:
|
| 73 |
actual_docs = json.load(f) # Contains all documents for this dataset
|
|
|
|
| 15 |
)
|
| 16 |
|
| 17 |
all_documents = []
|
|
|
|
|
|
|
| 18 |
ragbench = {}
|
| 19 |
|
| 20 |
|
|
|
|
| 37 |
# Convert embeddings to a NumPy array
|
| 38 |
embeddings_np = np.array(embeddings, dtype=np.float32)
|
| 39 |
|
| 40 |
+
global index_w
|
| 41 |
# Store in FAISS using the NumPy array's shape
|
| 42 |
+
index_w = faiss.IndexFlatL2(embeddings_np.shape[1])
|
| 43 |
+
index_w.add(embeddings_np)
|
| 44 |
|
| 45 |
# Save FAISS index
|
| 46 |
faiss.write_index(index, f"data_local/rag7_index.faiss")
|
|
|
|
| 52 |
print(f"data is stored!")
|
| 53 |
|
| 54 |
def load_data_from_faiss():
|
|
|
|
| 55 |
load_faiss()
|
| 56 |
load_metatdata()
|
| 57 |
|
|
|
|
| 61 |
ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
|
| 62 |
|
| 63 |
def load_faiss():
|
| 64 |
+
global index
|
| 65 |
faiss_index_path = f"data_local/rag7_index.faiss"
|
| 66 |
index = faiss.read_index(faiss_index_path)
|
| 67 |
|
| 68 |
def load_metatdata():
|
| 69 |
+
global actual_docs
|
| 70 |
with open(f"data_local/rag7_docs.json", "r") as f:
|
| 71 |
actual_docs = json.load(f) # Contains all documents for this dataset
|
evaluation.py
CHANGED
|
@@ -4,12 +4,15 @@ from sklearn.metrics import mean_squared_error, roc_auc_score
|
|
| 4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
|
|
|
|
|
|
|
| 7 |
ground_truth_answer = ''
|
| 8 |
ground_truth_metrics = {}
|
| 9 |
|
| 10 |
|
| 11 |
-
def calculate_metrics(question, response, docs,
|
| 12 |
-
|
|
|
|
| 13 |
# Predicted metrics
|
| 14 |
predicted_metrics = {
|
| 15 |
"context_relevance": context_relevance(question, docs),
|
|
|
|
| 4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
|
| 7 |
+
from data_processing import load_ragbench
|
| 8 |
+
|
| 9 |
ground_truth_answer = ''
|
| 10 |
ground_truth_metrics = {}
|
| 11 |
|
| 12 |
|
| 13 |
+
def calculate_metrics(question, response, docs, time_taken):
|
| 14 |
+
data = load_ragbench()
|
| 15 |
+
retrieve_ground_truths(question, data)
|
| 16 |
# Predicted metrics
|
| 17 |
predicted_metrics = {
|
| 18 |
"context_relevance": context_relevance(question, docs),
|