Spaces:

WillyCodesInit
/

finSmart.ai

Sleeping

App Files Files Community

finSmart.ai / utils.py

WillyCodesInit

Update utils.py

89da9cc verified 11 months ago

raw

history blame contribute delete

3.15 kB

	import json
	import pandas as pd
	import numpy as np
	from sentence_transformers import SentenceTransformer
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Initialize model and tokenizer
	model_name = "google/flan-t5-base" # You can use a different model if needed
	model = AutoModelForCausalLM.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	# Sentence transformer model to encode questions for similarity
	embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

	# Load question-answer data from CSV
	def load_qa_data_from_csv(file_path):
	"""
	Reads a CSV file containing question-answer pairs.
	Assumes the CSV file has columns 'question' and 'answer'.
	"""
	data = pd.read_csv(file_path)
	qa_pairs = list(zip(data['question'], data['answer']))
	return qa_pairs

	# Load question-answer data from JSON
	def load_qa_data_from_json(file_path):
	"""
	Reads a JSON file containing question-answer pairs.
	"""
	with open(file_path, 'r') as file:
	data = json.load(file)

	qa_pairs = [(item['question'], item['answer']) for item in data]
	return qa_pairs

	# Check if the question is related to finance
	def is_valid_finance_question(question):
	# Here you can refine the check to use model verification as well
	# For now, we are doing a simple check based on keywords
	finance_keywords = ['finance', 'investment', 'bank', 'insurance', 'credit', 'budget', 'economy', 'inflation',
	'debt', 'interest', 'mortgage', 'pension', 'retirement', 'savings']
	return any(keyword in question.lower() for keyword in finance_keywords)

	# Generate the response for a valid financial question
	def ask_finance_bot(user_query, qa_pairs):
	# Embed the user query
	query_embedding = embedding_model.encode([user_query])

	# Assuming 'index' here is a pre-built FAISS index or similar structure
	# For this example, using a basic search from qa_pairs
	retrieved_qa_pairs = qa_pairs[:3] # Take top 3 for now, or improve with vector search

	# Temperature control to avoid repetition if same question is asked frequently
	temperature = 0.7

	instruction = (
	"You are a highly knowledgeable AI assistant specializing strictly in finance.\n"
	"Strictly answer only financially related topics.\n"
	"Do not answer anything outside finance.\n"
	"Always provide accurate, objective, and concise answers to financial questions.\n"
	)

	# Create the prompt for the model
	prompt = f"{instruction}\n\nUser query: {user_query}\nAnswer:"

	input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
	output_ids = model.generate(
	**input_ids,
	max_new_tokens=256,
	temperature=temperature,
	top_p=0.9,
	pad_token_id=tokenizer.eos_token_id
	)

	response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	answer_text = response.split("Answer:")[-1].strip()

	if is_valid_finance_question(answer_text):
	return answer_text
	else:
	return "I'm specialized in finance and can't help with that."