Spaces:
Runtime error
Runtime error
| import kagglehub | |
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| import gc | |
| # Step 1: Download and Load the Dataset | |
| def load_dataset(): | |
| #kagglehub.dataset_download("nkenyor/physics-question-and-answer-brief") | |
| dataset_path = "./questions_answers.csv" | |
| df = pd.read_csv(dataset_path) | |
| return df | |
| # Step 2: Create Embeddings and FAISS Index | |
| def setup_faiss_index(): | |
| df = load_dataset() # Load dataset | |
| questions = df["question"].tolist() | |
| del df # Delete DataFrame if no longer needed | |
| gc.collect() # Force garbage collection | |
| # Load embedding model | |
| embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # Convert dataset questions into embeddings | |
| question_embeddings = embedder.encode(questions, convert_to_numpy=True) | |
| # Create FAISS index | |
| index = faiss.IndexFlatL2(question_embeddings.shape[1]) | |
| index.add(question_embeddings) | |
| # Save FAISS index and questions | |
| faiss.write_index(index, "physics_faiss.index") | |
| np.save("questions.npy", np.array(questions)) | |
| return index, questions, embedder | |
| # Step 3: Load the FAISS index and retrieve answers | |
| def retrieve_best_match(user_query, top_k=1): | |
| index = faiss.read_index("physics_faiss.index") | |
| questions = np.load("questions.npy", allow_pickle=True) | |
| embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| query_embedding = embedder.encode([user_query], convert_to_numpy=True) | |
| _, retrieved_indices = index.search(query_embedding, top_k) | |
| return questions[retrieved_indices[0][0]] # Return the best-matching question | |