WillyCodesInit commited on
Commit
a04820d
·
verified ·
1 Parent(s): 43fc51c

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +28 -30
utils.py CHANGED
@@ -1,34 +1,32 @@
1
- import json
 
 
 
 
2
 
3
- # Load the question-answer pairs from the JSON file
4
- def load_qa_pairs(json_file_path):
5
- try:
6
- with open(json_file_path, 'r') as f:
7
- qa_pairs = json.load(f)
8
- return qa_pairs
9
- except FileNotFoundError:
10
- print(f"Error: The file {json_file_path} was not found.")
11
- return []
12
- except json.JSONDecodeError:
13
- print(f"Error: The file {json_file_path} is not a valid JSON.")
14
- return []
15
- except Exception as e:
16
- print(f"An error occurred while loading the JSON file: {e}")
17
- return []
18
 
19
- # Function to get the financial answer from the question-answer pairs
20
- def get_financial_answer(user_query, qa_pairs, top_k=3):
21
- # Normalize the user query
22
- normalized_query = user_query.lower().strip()
23
 
24
- # Find the most relevant answers from the qa_pairs
25
- relevant_answers = []
26
- for pair in qa_pairs:
27
- if normalized_query in pair['question'].lower():
28
- relevant_answers.append(pair['answer'])
29
 
30
- # Return the best match (or top_k if multiple are relevant)
31
- if relevant_answers:
32
- return "\n".join(relevant_answers[:top_k])
33
- else:
34
- return "Sorry, I couldn't find an answer to your question."
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import faiss
4
+ from sentence_transformers import SentenceTransformer
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
 
7
+ # Load your CSV with 'question' and 'answer' columns
8
+ df = pd.read_csv("financa_data.csv")
9
+ qa_pairs = df["question"] + " | " + df["answer"]
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # Sentence Transformer for embeddings
12
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
13
+ embeddings = embedding_model.encode(qa_pairs.tolist(), convert_to_numpy=True)
 
14
 
15
+ # FAISS index
16
+ dimension = embeddings.shape[1]
17
+ index = faiss.IndexFlatL2(dimension)
18
+ index.add(embeddings)
 
19
 
20
+ # FLAN-T5
21
+ tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
22
+ model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
23
+
24
+ def ask_finance_bot(user_query, top_k=3):
25
+ query_embedding = embedding_model.encode([user_query])
26
+ D, I = index.search(np.array(query_embedding), top_k)
27
+ context = "\n".join([qa_pairs[i] for i in I[0]])
28
+
29
+ prompt = f"Context:\n{context}\n\nQuestion: {user_query}\nAnswer:"
30
+ inputs = tokenizer(prompt, return_tensors="pt")
31
+ outputs = model.generate(**inputs, max_new_tokens=256)
32
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)