Spaces:
Sleeping
Sleeping
File size: 3,152 Bytes
89da9cc a04820d 89da9cc 0581902 89da9cc 0581902 89da9cc f143c26 89da9cc 0581902 89da9cc a04820d 89da9cc a04820d 89da9cc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | import json
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
# Initialize model and tokenizer
model_name = "google/flan-t5-base" # You can use a different model if needed
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Sentence transformer model to encode questions for similarity
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
# Load question-answer data from CSV
def load_qa_data_from_csv(file_path):
"""
Reads a CSV file containing question-answer pairs.
Assumes the CSV file has columns 'question' and 'answer'.
"""
data = pd.read_csv(file_path)
qa_pairs = list(zip(data['question'], data['answer']))
return qa_pairs
# Load question-answer data from JSON
def load_qa_data_from_json(file_path):
"""
Reads a JSON file containing question-answer pairs.
"""
with open(file_path, 'r') as file:
data = json.load(file)
qa_pairs = [(item['question'], item['answer']) for item in data]
return qa_pairs
# Check if the question is related to finance
def is_valid_finance_question(question):
# Here you can refine the check to use model verification as well
# For now, we are doing a simple check based on keywords
finance_keywords = ['finance', 'investment', 'bank', 'insurance', 'credit', 'budget', 'economy', 'inflation',
'debt', 'interest', 'mortgage', 'pension', 'retirement', 'savings']
return any(keyword in question.lower() for keyword in finance_keywords)
# Generate the response for a valid financial question
def ask_finance_bot(user_query, qa_pairs):
# Embed the user query
query_embedding = embedding_model.encode([user_query])
# Assuming 'index' here is a pre-built FAISS index or similar structure
# For this example, using a basic search from qa_pairs
retrieved_qa_pairs = qa_pairs[:3] # Take top 3 for now, or improve with vector search
# Temperature control to avoid repetition if same question is asked frequently
temperature = 0.7
instruction = (
"You are a highly knowledgeable AI assistant specializing strictly in finance.\n"
"Strictly answer only financially related topics.\n"
"Do not answer anything outside finance.\n"
"Always provide accurate, objective, and concise answers to financial questions.\n"
)
# Create the prompt for the model
prompt = f"{instruction}\n\nUser query: {user_query}\nAnswer:"
input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
output_ids = model.generate(
**input_ids,
max_new_tokens=256,
temperature=temperature,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
answer_text = response.split("Answer:")[-1].strip()
if is_valid_finance_question(answer_text):
return answer_text
else:
return "I'm specialized in finance and can't help with that."
|