Spaces:
Sleeping
Sleeping
Update rag.py
Browse files
rag.py
CHANGED
|
@@ -7,6 +7,7 @@ import pandas as pd
|
|
| 7 |
from datasets import load_dataset, Dataset
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
import random
|
|
|
|
| 10 |
|
| 11 |
# Load environment variables
|
| 12 |
load_dotenv()
|
|
@@ -36,15 +37,23 @@ UNMATCHED_RESPONSES = [
|
|
| 36 |
"We appreciate your question. It has been forwarded for further processing. Until it’s available here, feel free to visit the official UE website or use the contact options:\n\n📞 +92-42-99262231-33\n✉️ info@ue.edu.pk\n🌐 https://ue.edu.pk"
|
| 37 |
]
|
| 38 |
|
| 39 |
-
# Load
|
|
|
|
| 40 |
try:
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
except Exception as e:
|
| 46 |
-
print(f"Error loading
|
| 47 |
-
dataset = []
|
| 48 |
|
| 49 |
# Precompute embeddings
|
| 50 |
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
|
|
@@ -131,4 +140,4 @@ DO NOT add any new or extra information. ONLY rephrase and improve the clarity a
|
|
| 131 |
return llm_response.split(marker)[-1].strip()
|
| 132 |
return llm_response
|
| 133 |
else:
|
| 134 |
-
return dataset_answers[best_match_idx]
|
|
|
|
| 7 |
from datasets import load_dataset, Dataset
|
| 8 |
from dotenv import load_dotenv
|
| 9 |
import random
|
| 10 |
+
import glob
|
| 11 |
|
| 12 |
# Load environment variables
|
| 13 |
load_dotenv()
|
|
|
|
| 37 |
"We appreciate your question. It has been forwarded for further processing. Until it’s available here, feel free to visit the official UE website or use the contact options:\n\n📞 +92-42-99262231-33\n✉️ info@ue.edu.pk\n🌐 https://ue.edu.pk"
|
| 38 |
]
|
| 39 |
|
| 40 |
+
# Load multiple JSON datasets
|
| 41 |
+
dataset = []
|
| 42 |
try:
|
| 43 |
+
json_files = glob.glob('datasets/*.json')
|
| 44 |
+
for file_path in json_files:
|
| 45 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 46 |
+
data = json.load(f)
|
| 47 |
+
if isinstance(data, list):
|
| 48 |
+
for item in data:
|
| 49 |
+
if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
|
| 50 |
+
dataset.append(item)
|
| 51 |
+
else:
|
| 52 |
+
print(f"Invalid entry in {file_path}: {item}")
|
| 53 |
+
else:
|
| 54 |
+
print(f"File {file_path} does not contain a list.")
|
| 55 |
except Exception as e:
|
| 56 |
+
print(f"Error loading datasets: {e}")
|
|
|
|
| 57 |
|
| 58 |
# Precompute embeddings
|
| 59 |
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
|
|
|
|
| 140 |
return llm_response.split(marker)[-1].strip()
|
| 141 |
return llm_response
|
| 142 |
else:
|
| 143 |
+
return dataset_answers[best_match_idx]
|