Spaces:
Sleeping
Sleeping
Update rag.py
Browse files
rag.py
CHANGED
|
@@ -24,9 +24,23 @@ similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
|
|
| 24 |
HF_DATASET_REPO = "midrees2806/unmatched_queries"
|
| 25 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 26 |
|
| 27 |
-
# Load
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
# Precompute embeddings
|
| 32 |
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
|
|
|
|
| 24 |
HF_DATASET_REPO = "midrees2806/unmatched_queries"
|
| 25 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 26 |
|
| 27 |
+
# Load multiple JSON datasets
|
| 28 |
+
dataset = []
|
| 29 |
+
try:
|
| 30 |
+
json_files = glob.glob('datasets/*.json')
|
| 31 |
+
for file_path in json_files:
|
| 32 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 33 |
+
data = json.load(f)
|
| 34 |
+
if isinstance(data, list):
|
| 35 |
+
for item in data:
|
| 36 |
+
if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
|
| 37 |
+
dataset.append(item)
|
| 38 |
+
else:
|
| 39 |
+
print(f"Invalid entry in {file_path}: {item}")
|
| 40 |
+
else:
|
| 41 |
+
print(f"File {file_path} does not contain a list.")
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f"Error loading datasets: {e}")
|
| 44 |
|
| 45 |
# Precompute embeddings
|
| 46 |
dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
|