Spaces:

midrees2806
/

Chatbot

Running

midrees2806 commited on Jun 9, 2025

Commit

317bf1d

verified ·

1 Parent(s): 89ea6e8

Update rag.py

Files changed (1) hide show

rag.py CHANGED Viewed

@@ -7,6 +7,7 @@ import pandas as pd
 from datasets import load_dataset, Dataset
 from dotenv import load_dotenv
 import random
 # Load environment variables
 load_dotenv()
@@ -36,15 +37,23 @@ UNMATCHED_RESPONSES = [
     "We appreciate your question. It has been forwarded for further processing. Until it’s available here, feel free to visit the official UE website or use the contact options:\n\n📞 +92-42-99262231-33\n✉️ info@ue.edu.pk\n🌐 https://ue.edu.pk"
 ]
-# Load local dataset
 try:
-    with open('dataset.json', 'r') as f:
-        dataset = json.load(f)
-    if not all(isinstance(item, dict) and 'Question' in item and 'Answer' in item for item in dataset):
-        raise ValueError("Invalid dataset structure")
 except Exception as e:
-    print(f"Error loading dataset: {e}")
-    dataset = []
 # Precompute embeddings
 dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
@@ -131,4 +140,4 @@ DO NOT add any new or extra information. ONLY rephrase and improve the clarity a
                 return llm_response.split(marker)[-1].strip()
         return llm_response
     else:
-        return dataset_answers[best_match_idx]

 from datasets import load_dataset, Dataset
 from dotenv import load_dotenv
 import random
+import glob
 # Load environment variables
 load_dotenv()
     "We appreciate your question. It has been forwarded for further processing. Until it’s available here, feel free to visit the official UE website or use the contact options:\n\n📞 +92-42-99262231-33\n✉️ info@ue.edu.pk\n🌐 https://ue.edu.pk"
 ]
+# Load multiple JSON datasets
+dataset = []
 try:
+    json_files = glob.glob('datasets/*.json')
+    for file_path in json_files:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            if isinstance(data, list):
+                for item in data:
+                    if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
+                        dataset.append(item)
+                    else:
+                        print(f"Invalid entry in {file_path}: {item}")
+            else:
+                print(f"File {file_path} does not contain a list.")
 except Exception as e:
+    print(f"Error loading datasets: {e}")
 # Precompute embeddings
 dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
                 return llm_response.split(marker)[-1].strip()
         return llm_response
     else:
+        return dataset_answers[best_match_idx]