Spaces:

WillyCodesInit
/

finSmart.ai

Sleeping

File size: 3,152 Bytes

89da9cc
a04820d
 
 
89da9cc
 
0581902
89da9cc
 
 
 
0581902
89da9cc
 
f143c26
89da9cc
 
 
 
 
 
 
 
 
0581902
89da9cc
 
 
 
 
 
 
 
 
 
a04820d
89da9cc
 
 
 
 
 
 
 
 
 
 
a04820d
 
89da9cc

import json
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

# Initialize model and tokenizer
model_name = "google/flan-t5-base"  # You can use a different model if needed
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Sentence transformer model to encode questions for similarity
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Load question-answer data from CSV
def load_qa_data_from_csv(file_path):
    """
    Reads a CSV file containing question-answer pairs.
    Assumes the CSV file has columns 'question' and 'answer'.
    """
    data = pd.read_csv(file_path)
    qa_pairs = list(zip(data['question'], data['answer']))
    return qa_pairs

# Load question-answer data from JSON
def load_qa_data_from_json(file_path):
    """
    Reads a JSON file containing question-answer pairs.
    """
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    qa_pairs = [(item['question'], item['answer']) for item in data]
    return qa_pairs

# Check if the question is related to finance
def is_valid_finance_question(question):
    # Here you can refine the check to use model verification as well
    # For now, we are doing a simple check based on keywords
    finance_keywords = ['finance', 'investment', 'bank', 'insurance', 'credit', 'budget', 'economy', 'inflation', 
                        'debt', 'interest', 'mortgage', 'pension', 'retirement', 'savings']
    return any(keyword in question.lower() for keyword in finance_keywords)

# Generate the response for a valid financial question
def ask_finance_bot(user_query, qa_pairs):
    # Embed the user query
    query_embedding = embedding_model.encode([user_query])

    # Assuming 'index' here is a pre-built FAISS index or similar structure
    # For this example, using a basic search from qa_pairs
    retrieved_qa_pairs = qa_pairs[:3]  # Take top 3 for now, or improve with vector search

    # Temperature control to avoid repetition if same question is asked frequently
    temperature = 0.7

    instruction = (
        "You are a highly knowledgeable AI assistant specializing strictly in finance.\n"
        "Strictly answer only financially related topics.\n"
        "Do not answer anything outside finance.\n"
        "Always provide accurate, objective, and concise answers to financial questions.\n"
    )

    # Create the prompt for the model
    prompt = f"{instruction}\n\nUser query: {user_query}\nAnswer:"

    input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=256,
        temperature=temperature,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    answer_text = response.split("Answer:")[-1].strip()

    if is_valid_finance_question(answer_text):
        return answer_text
    else:
        return "I'm specialized in finance and can't help with that."