Spaces:
Sleeping
Sleeping
| # model_utils.py | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| # --- Load LLaMA model --- | |
| def load_llama_model(): | |
| model_id = "meta-llama/Meta-Llama-3-8B-Instruct" # Ensure you have access | |
| tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| device_map="auto" | |
| ) | |
| return tokenizer, model | |
| # --- Helper functions --- | |
| def is_finance_question(user_query, tokenizer, model): | |
| check_prompt = ( | |
| f"You are a financial expert. Determine whether the following question is clearly about finance:\n\n" | |
| f"Question: {user_query}\n\n" | |
| f"Respond only with 'Yes' or 'No'." | |
| ) | |
| input_ids = tokenizer(check_prompt, return_tensors="pt").to(model.device) | |
| output_ids = model.generate( | |
| **input_ids, | |
| max_new_tokens=10, | |
| temperature=0.0, | |
| top_p=0.9, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip() | |
| return response.lower().startswith("yes") | |
| def ask_finance_bot(user_query, answers, embedding_model, index, tokenizer, model, top_k=3): | |
| normalized_query = user_query.lower().strip() | |
| recent_questions = {} | |
| count = recent_questions.get(normalized_query, 0) + 1 | |
| recent_questions[normalized_query] = count | |
| query_embedding = embedding_model.encode([user_query]) | |
| D, I = index.search(np.array(query_embedding), top_k) | |
| retrieved_answers = [answers[i] for i in I[0]] | |
| context = "\n".join([f"- {text}" for text in retrieved_answers]) | |
| temperature = min(0.7 + 0.1 * (count - 1), 1.0) | |
| instruction = ( | |
| "You are a highly knowledgeable AI assistant specializing strictly in finance.\n" | |
| "Strictly answer only financially related topics.\n" | |
| "Never answer questions that are not financially related.\n" | |
| "Always provide accurate, objective, and concise answers to financial questions.\n" | |
| "If a valid financial question is asked, always answer.\n" | |
| "If a question is unrelated to finance, respond: 'I'm specialized in finance and can't help with that. How can I assist you with a finance-related question today?'\n" | |
| "If a greeting like 'Hi', 'Hello', or 'Hey' is used, respond with: 'Hello! How can I help you with your finance-related question today?'\n" | |
| ) | |
| for _ in range(4): | |
| prompt = f"""{instruction} | |
| Background context: | |
| {context} | |
| User question: {user_query} | |
| Answer:""" | |
| input_ids = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| output_ids = model.generate( | |
| **input_ids, | |
| max_new_tokens=256, | |
| temperature=temperature, | |
| top_p=0.9, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| answer_text = response.split("Answer:")[-1].strip() | |
| if any(word.lower() in answer_text.lower() for word in user_query.lower().split()): | |
| return answer_text | |
| return "I'm not confident in the response. Please consult a certified financial expert." |