Spaces:
Sleeping
Sleeping
| import json | |
| import pandas as pd | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # Initialize model and tokenizer | |
| model_name = "google/flan-t5-base" # You can use a different model if needed | |
| model = AutoModelForCausalLM.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| # Sentence transformer model to encode questions for similarity | |
| embedding_model = SentenceTransformer('all-MiniLM-L6-v2') | |
| # Load question-answer data from CSV | |
| def load_qa_data_from_csv(file_path): | |
| """ | |
| Reads a CSV file containing question-answer pairs. | |
| Assumes the CSV file has columns 'question' and 'answer'. | |
| """ | |
| data = pd.read_csv(file_path) | |
| qa_pairs = list(zip(data['question'], data['answer'])) | |
| return qa_pairs | |
| # Load question-answer data from JSON | |
| def load_qa_data_from_json(file_path): | |
| """ | |
| Reads a JSON file containing question-answer pairs. | |
| """ | |
| with open(file_path, 'r') as file: | |
| data = json.load(file) | |
| qa_pairs = [(item['question'], item['answer']) for item in data] | |
| return qa_pairs | |
| # Check if the question is related to finance | |
| def is_valid_finance_question(question): | |
| # Here you can refine the check to use model verification as well | |
| # For now, we are doing a simple check based on keywords | |
| finance_keywords = ['finance', 'investment', 'bank', 'insurance', 'credit', 'budget', 'economy', 'inflation', | |
| 'debt', 'interest', 'mortgage', 'pension', 'retirement', 'savings'] | |
| return any(keyword in question.lower() for keyword in finance_keywords) | |
| # Generate the response for a valid financial question | |
| def ask_finance_bot(user_query, qa_pairs): | |
| # Embed the user query | |
| query_embedding = embedding_model.encode([user_query]) | |
| # Assuming 'index' here is a pre-built FAISS index or similar structure | |
| # For this example, using a basic search from qa_pairs | |
| retrieved_qa_pairs = qa_pairs[:3] # Take top 3 for now, or improve with vector search | |
| # Temperature control to avoid repetition if same question is asked frequently | |
| temperature = 0.7 | |
| instruction = ( | |
| "You are a highly knowledgeable AI assistant specializing strictly in finance.\n" | |
| "Strictly answer only financially related topics.\n" | |
| "Do not answer anything outside finance.\n" | |
| "Always provide accurate, objective, and concise answers to financial questions.\n" | |
| ) | |
| # Create the prompt for the model | |
| prompt = f"{instruction}\n\nUser query: {user_query}\nAnswer:" | |
| input_ids = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| output_ids = model.generate( | |
| **input_ids, | |
| max_new_tokens=256, | |
| temperature=temperature, | |
| top_p=0.9, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| answer_text = response.split("Answer:")[-1].strip() | |
| if is_valid_finance_question(answer_text): | |
| return answer_text | |
| else: | |
| return "I'm specialized in finance and can't help with that." | |