Spaces:

midrees2806
/

UoeChatbot

Sleeping

midrees2806 commited on 30 days ago

Commit

728639e

verified ·

1 Parent(s): 65b85bf

Update rag.py

Files changed (1) hide show

rag.py CHANGED Viewed

@@ -24,9 +24,23 @@ similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 HF_DATASET_REPO = "midrees2806/unmatched_queries"
 HF_TOKEN = os.getenv("HF_TOKEN")
-# Load dataset (automatically using the path)
-with open('dataset/', 'r') as f:
-    dataset = json.load(f)
 # Precompute embeddings
 dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]

 HF_DATASET_REPO = "midrees2806/unmatched_queries"
 HF_TOKEN = os.getenv("HF_TOKEN")
+# Load multiple JSON datasets
+dataset = []
+try:
+    json_files = glob.glob('datasets/*.json')
+    for file_path in json_files:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+            if isinstance(data, list):
+                for item in data:
+                    if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
+                        dataset.append(item)
+                    else:
+                        print(f"Invalid entry in {file_path}: {item}")
+            else:
+                print(f"File {file_path} does not contain a list.")
+except Exception as e:
+    print(f"Error loading datasets: {e}")
 # Precompute embeddings
 dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]