Spaces:

cb1716pics
/

23RAG7

Build error

cb1716pics commited on Feb 20, 2025

Commit

a523549

verified ·

1 Parent(s): 973db40

Upload 3 files

Files changed (3) hide show

app.py CHANGED Viewed

@@ -2,7 +2,8 @@ import streamlit as st
 from generator import generate_response_from_document
 from retrieval import retrieve_documents
 from evaluation import calculate_metrics
-from data_processing import load_data_from_faiss, ragbench
 # Page Title
 st.title("RAG7 - Real World RAG System")
@@ -13,14 +14,19 @@ def load_data():
 data_status = load_data()
 # Question Section
 st.subheader("Hi, What do you want to know today?")
 question = st.text_area("Enter your question:", placeholder="Type your question here...", height=100)
 # Submit Button
 if st.button("Submit"):
     retrieved_documents = retrieve_documents(question, 5)
     response = generate_response_from_document(question, retrieved_documents)
 else:
     response = ""
@@ -35,7 +41,7 @@ col1, col2 = st.columns([1, 3])  # Creating two columns for button and metrics d
 with col1:
     if st.button("Calculate Metrics"):
-        metrics = calculate_metrics(question, response, retrieved_documents, ragbench)
     else:
         metrics = ""

 from generator import generate_response_from_document
 from retrieval import retrieve_documents
 from evaluation import calculate_metrics
+from data_processing import load_data_from_faiss
+import time
 # Page Title
 st.title("RAG7 - Real World RAG System")
 data_status = load_data()
+time_taken_for_response = 'N/A'
 # Question Section
 st.subheader("Hi, What do you want to know today?")
 question = st.text_area("Enter your question:", placeholder="Type your question here...", height=100)
 # Submit Button
 if st.button("Submit"):
+    start_time = time.time()
     retrieved_documents = retrieve_documents(question, 5)
     response = generate_response_from_document(question, retrieved_documents)
+    end_time = time.time()
+    time_taken_for_response = end_time-start_time
 else:
     response = ""
 with col1:
     if st.button("Calculate Metrics"):
+        metrics = calculate_metrics(question, response, retrieved_documents, time_taken_for_response)
     else:
         metrics = ""

data_processing.py CHANGED Viewed

@@ -15,8 +15,6 @@ embedding_model = HuggingFaceEmbeddings(
 )
 all_documents = []
-index = None
-actual_docs = None
 ragbench = {}
@@ -39,9 +37,10 @@ def create_faiss_index_file():
     # Convert embeddings to a NumPy array
     embeddings_np = np.array(embeddings, dtype=np.float32)
     # Store in FAISS using the NumPy array's shape
-    index = faiss.IndexFlatL2(embeddings_np.shape[1])
-    index.add(embeddings_np)
     # Save FAISS index
     faiss.write_index(index, f"data_local/rag7_index.faiss")
@@ -53,7 +52,6 @@ def create_faiss_index_file():
     print(f"data is stored!")
 def load_data_from_faiss():
-    load_ragbench()
     load_faiss()
     load_metatdata()
@@ -63,11 +61,11 @@ def load_ragbench():
         ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
 def load_faiss():
-    # Load the correct FAISS index
     faiss_index_path = f"data_local/rag7_index.faiss"
     index = faiss.read_index(faiss_index_path)
 def load_metatdata():
-    # Load document metadata
     with open(f"data_local/rag7_docs.json", "r") as f:
         actual_docs = json.load(f)  # Contains all documents for this dataset

 )
 all_documents = []
 ragbench = {}
     # Convert embeddings to a NumPy array
     embeddings_np = np.array(embeddings, dtype=np.float32)
+    global index_w
     # Store in FAISS using the NumPy array's shape
+    index_w = faiss.IndexFlatL2(embeddings_np.shape[1])
+    index_w.add(embeddings_np)
     # Save FAISS index
     faiss.write_index(index, f"data_local/rag7_index.faiss")
     print(f"data is stored!")
 def load_data_from_faiss():
     load_faiss()
     load_metatdata()
         ragbench[dataset] = load_dataset("rungalileo/ragbench", dataset)
 def load_faiss():
+    global index
     faiss_index_path = f"data_local/rag7_index.faiss"
     index = faiss.read_index(faiss_index_path)
 def load_metatdata():
+    global actual_docs
     with open(f"data_local/rag7_docs.json", "r") as f:
         actual_docs = json.load(f)  # Contains all documents for this dataset

evaluation.py CHANGED Viewed

@@ -4,12 +4,15 @@ from sklearn.metrics import mean_squared_error, roc_auc_score
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
 ground_truth_answer = ''
 ground_truth_metrics = {}
-def calculate_metrics(question, response, docs,data, time_taken):
-    retrieve_ground_truths(question,data)
     # Predicted metrics
     predicted_metrics = {
         "context_relevance": context_relevance(question, docs),

 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.pairwise import cosine_similarity
+from data_processing import load_ragbench
 ground_truth_answer = ''
 ground_truth_metrics = {}
+def calculate_metrics(question, response, docs, time_taken):
+    data =  load_ragbench()
+    retrieve_ground_truths(question, data)
     # Predicted metrics
     predicted_metrics = {
         "context_relevance": context_relevance(question, docs),