midrees2806 commited on
Commit
728639e
·
verified ·
1 Parent(s): 65b85bf

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +17 -3
rag.py CHANGED
@@ -24,9 +24,23 @@ similarity_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
24
  HF_DATASET_REPO = "midrees2806/unmatched_queries"
25
  HF_TOKEN = os.getenv("HF_TOKEN")
26
 
27
- # Load dataset (automatically using the path)
28
- with open('dataset/', 'r') as f:
29
- dataset = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  # Precompute embeddings
32
  dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]
 
24
  HF_DATASET_REPO = "midrees2806/unmatched_queries"
25
  HF_TOKEN = os.getenv("HF_TOKEN")
26
 
27
+ # Load multiple JSON datasets
28
+ dataset = []
29
+ try:
30
+ json_files = glob.glob('datasets/*.json')
31
+ for file_path in json_files:
32
+ with open(file_path, 'r', encoding='utf-8') as f:
33
+ data = json.load(f)
34
+ if isinstance(data, list):
35
+ for item in data:
36
+ if isinstance(item, dict) and 'Question' in item and 'Answer' in item:
37
+ dataset.append(item)
38
+ else:
39
+ print(f"Invalid entry in {file_path}: {item}")
40
+ else:
41
+ print(f"File {file_path} does not contain a list.")
42
+ except Exception as e:
43
+ print(f"Error loading datasets: {e}")
44
 
45
  # Precompute embeddings
46
  dataset_questions = [item.get("Question", "").lower().strip() for item in dataset]