Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| from sentence_transformers import SentenceTransformer, util | |
| # Load and embed Q&A data | |
| def load_qa_data(file_path="train_data.csv"): | |
| df = pd.read_csv(file_path) | |
| questions = df["question"].tolist() | |
| answers = df["answer"].tolist() | |
| return questions, answers | |
| # Set up embeddings | |
| model = SentenceTransformer("all-MiniLM-L6-v2") | |
| questions, answers = load_qa_data() | |
| question_embeddings = model.encode(questions, convert_to_tensor=True) | |
| # Search top-k context | |
| def retrieve_context(query, top_k=3): | |
| query_embedding = model.encode(query, convert_to_tensor=True) | |
| hits = util.semantic_search(query_embedding, question_embeddings, top_k=top_k)[0] | |
| context = "" | |
| for hit in hits: | |
| idx = hit["corpus_id"] | |
| context += f"Q: {questions[idx]}\nA: {answers[idx]}\n\n" | |
| return context.strip() | |
| # Use RAG to check if it's a finance question | |
| def is_financial_text_via_context(query, threshold=0.3): | |
| query_embedding = model.encode(query, convert_to_tensor=True) | |
| similarities = util.cos_sim(query_embedding, question_embeddings)[0] | |
| max_sim = float(similarities.max()) | |
| return max_sim > threshold | |