Spaces:

WillyCodesInit
/

finSmart.ai

Sleeping

WillyCodesInit commited on May 7, 2025

Commit

a04820d

verified ·

1 Parent(s): 43fc51c

Update utils.py

Files changed (1) hide show

utils.py CHANGED Viewed

@@ -1,34 +1,32 @@
-import json
-# Load the question-answer pairs from the JSON file
-def load_qa_pairs(json_file_path):
-    try:
-        with open(json_file_path, 'r') as f:
-            qa_pairs = json.load(f)
-        return qa_pairs
-    except FileNotFoundError:
-        print(f"Error: The file {json_file_path} was not found.")
-        return []
-    except json.JSONDecodeError:
-        print(f"Error: The file {json_file_path} is not a valid JSON.")
-        return []
-    except Exception as e:
-        print(f"An error occurred while loading the JSON file: {e}")
-        return []
-# Function to get the financial answer from the question-answer pairs
-def get_financial_answer(user_query, qa_pairs, top_k=3):
-    # Normalize the user query
-    normalized_query = user_query.lower().strip()
-    # Find the most relevant answers from the qa_pairs
-    relevant_answers = []
-    for pair in qa_pairs:
-        if normalized_query in pair['question'].lower():
-            relevant_answers.append(pair['answer'])
-    # Return the best match (or top_k if multiple are relevant)
-    if relevant_answers:
-        return "\n".join(relevant_answers[:top_k])
-    else:
-        return "Sorry, I couldn't find an answer to your question."

+import pandas as pd
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+# Load your CSV with 'question' and 'answer' columns
+df = pd.read_csv("financa_data.csv")
+qa_pairs = df["question"] + " | " + df["answer"]
+# Sentence Transformer for embeddings
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+embeddings = embedding_model.encode(qa_pairs.tolist(), convert_to_numpy=True)
+# FAISS index
+dimension = embeddings.shape[1]
+index = faiss.IndexFlatL2(dimension)
+index.add(embeddings)
+# FLAN-T5
+tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
+model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
+def ask_finance_bot(user_query, top_k=3):
+    query_embedding = embedding_model.encode([user_query])
+    D, I = index.search(np.array(query_embedding), top_k)
+    context = "\n".join([qa_pairs[i] for i in I[0]])
+    prompt = f"Context:\n{context}\n\nQuestion: {user_query}\nAnswer:"
+    inputs = tokenizer(prompt, return_tensors="pt")
+    outputs = model.generate(**inputs, max_new_tokens=256)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)