Spaces:
Runtime error
Runtime error
| import os | |
| import pandas as pd | |
| import faiss | |
| import pickle | |
| from sentence_transformers import SentenceTransformer | |
| def chunk_dataset_rows(file_path="data/medical_qa.csv"): | |
| if not os.path.exists(file_path): | |
| # Fallback: Sample data if the CSV file is missing | |
| data = { | |
| "question": [ | |
| "What are the symptoms of diabetes?", | |
| "How is hypertension treated?" | |
| ], | |
| "answer": [ | |
| "Symptoms include increased thirst and frequent urination.", | |
| "Treatment includes lifestyle changes and medication." | |
| ] | |
| } | |
| df = pd.DataFrame(data) | |
| else: | |
| df = pd.read_csv(file_path) | |
| chunks = [] | |
| for i, row in df.iterrows(): | |
| question = row.get("question", "") | |
| answer = row.get("answer", "") | |
| if isinstance(question, str) and isinstance(answer, str): | |
| chunks.append(f"Q: {question}\nA: {answer}") | |
| return chunks | |
| def process_medical_dataset(): | |
| file_path = "data/medical_qa.csv" | |
| chunks = chunk_dataset_rows(file_path) | |
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| embeddings = model.encode(chunks) | |
| # Create FAISS index | |
| dim = embeddings[0].shape[0] | |
| index = faiss.IndexFlatL2(dim) | |
| index.add(embeddings) | |
| # Save index and chunks | |
| os.makedirs("faiss_index", exist_ok=True) | |
| faiss.write_index(index, "faiss_index/index.faiss") | |
| with open("faiss_index/index.pkl", "wb") as f: | |
| pickle.dump(chunks, f) | |
| print("β Medical dataset processed and indexed.") | |