File size: 3,266 Bytes
9539702
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# model_utils.py
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# --- Load LLaMA model ---
def load_llama_model():
    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"  # Ensure you have access
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto"
    )
    return tokenizer, model

# --- Helper functions ---
def is_finance_question(user_query, tokenizer, model):
    check_prompt = (
        f"You are a financial expert. Determine whether the following question is clearly about finance:\n\n"
        f"Question: {user_query}\n\n"
        f"Respond only with 'Yes' or 'No'."
    )
    input_ids = tokenizer(check_prompt, return_tensors="pt").to(model.device)
    output_ids = model.generate(
        **input_ids,
        max_new_tokens=10,
        temperature=0.0,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id
    )
    response = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
    return response.lower().startswith("yes")

def ask_finance_bot(user_query, answers, embedding_model, index, tokenizer, model, top_k=3):
    normalized_query = user_query.lower().strip()
    recent_questions = {}

    count = recent_questions.get(normalized_query, 0) + 1
    recent_questions[normalized_query] = count

    query_embedding = embedding_model.encode([user_query])
    D, I = index.search(np.array(query_embedding), top_k)
    retrieved_answers = [answers[i] for i in I[0]]
    context = "\n".join([f"- {text}" for text in retrieved_answers])

    temperature = min(0.7 + 0.1 * (count - 1), 1.0)

    instruction = (
        "You are a highly knowledgeable AI assistant specializing strictly in finance.\n"
        "Strictly answer only financially related topics.\n"
        "Never answer questions that are not financially related.\n"
        "Always provide accurate, objective, and concise answers to financial questions.\n"
        "If a valid financial question is asked, always answer.\n"
        "If a question is unrelated to finance, respond: 'I'm specialized in finance and can't help with that. How can I assist you with a finance-related question today?'\n"
        "If a greeting like 'Hi', 'Hello', or 'Hey' is used, respond with: 'Hello! How can I help you with your finance-related question today?'\n"
    )

    for _ in range(4):
        prompt = f"""{instruction}

Background context:
{context}

User question: {user_query}

Answer:"""

        input_ids = tokenizer(prompt, return_tensors="pt").to(model.device)
        output_ids = model.generate(
            **input_ids,
            max_new_tokens=256,
            temperature=temperature,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )

        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        answer_text = response.split("Answer:")[-1].strip()

        if any(word.lower() in answer_text.lower() for word in user_query.lower().split()):
            return answer_text

    return "I'm not confident in the response. Please consult a certified financial expert."