Spaces:

WillyCodesInit
/

finsmart_bot

Sleeping

App Files Files Community

WillyCodesInit commited on May 6, 2025

Commit

644cc5a

verified ·

1 Parent(s): 0c571b0

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +46 -47

src/streamlit_app.py CHANGED Viewed

@@ -8,45 +8,50 @@ from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login
-# --- HuggingFace login ---
-HF_TOKEN = os.getenv("HF_TOKEN")
 if HF_TOKEN:
     login(token=HF_TOKEN)
-# Path to the data file within the 'src' folder
-data_path = os.path.join(os.path.dirname(__file__), 'train_data.csv')
-# Load data
-train = pd.read_csv(data_path)
 # --- Load data ---
-questions = train['question'].tolist()
-answers = train['answer'].tolist()
 qa_pairs = [f"Q: {q} A: {a}" for q, a in zip(questions, answers)]
-# --- Embedding model ---
-embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
-answer_embeddings = embedding_model.encode(answers)
-# --- FAISS index ---
-index = faiss.IndexFlatL2(answer_embeddings.shape[1])
-index.add(np.array(answer_embeddings))
-# --- LLaMA model setup ---
-model_name = "meta-llama/Llama-3-8B-Instruct"  # Update to a valid space-available model if needed
-tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    device_map="auto"
-)
-# --- Helper Functions ---
 def sanitize_answer(question, answer):
     return any(word.lower() in answer.lower() for word in question.lower().split())
@@ -74,7 +79,6 @@ def ask_finance_bot(user_query, top_k=3):
     count = recent_questions.get(normalized_query, 0) + 1
     recent_questions[normalized_query] = count
-    # Embed user query
     query_embedding = embedding_model.encode([user_query])
     D, I = index.search(np.array(query_embedding), top_k)
     retrieved_answers = [answers[i] for i in I[0]]
@@ -86,18 +90,13 @@ def ask_finance_bot(user_query, top_k=3):
         "You are a highly knowledgeable AI assistant specializing strictly in finance.\n"
         "Strictly answer only financially related topics.\n"
         "Never answer questions that are not financially related.\n"
-        "Do not answer anything outside finance.\n"
         "Always provide accurate, objective, and concise answers to financial questions.\n"
-        "Avoid unnecessary elaboration and focus directly on answering the user's query.\n"
-        "Use the background context only if it is accurate, clear, and relevant. If the context is unclear, incomplete, low-quality, or irrelevant, ignore it and generate your own correct, concise financial answer.\n"
-        "Do not copy or repeat the context verbatim — instead, synthesize your own response based on it.\n"
-        "Do not speculate or use personal phrases like 'I think' or 'In my opinion'.\n"
-        "If a valid financial question is asked, always answer — never refuse or say 'I can't help with that.'\n"
         "If a question is unrelated to finance, respond: 'I'm specialized in finance and can't help with that. How can I assist you with a finance-related question today?'\n"
         "If a greeting like 'Hi', 'Hello', or 'Hey' is used, respond with: 'Hello! How can I help you with your finance-related question today?'\n"
     )
-    for _ in range(6):
         prompt = f"""{instruction}
 Background context:
@@ -124,17 +123,17 @@ Answer:"""
     return "I'm not confident in the response. Please consult a certified financial expert."
-# --- Streamlit App UI ---
 st.set_page_config(page_title="DiMowkayBot - Finance Assistant", layout="centered")
-st.title("DiMowkayBot - Your Finance Q&A Assistant")
-user_query = st.text_input("Enter your finance-related question:")
 if user_query:
-    if not is_finance_question(user_query):
-        st.warning("I'm specialized in finance and can't help with that. How can I assist you with a finance-related question today?")
-    else:
-        with st.spinner("Thinking..."):
             answer = ask_finance_bot(user_query)
-        st.success("Response:")
-        st.write(answer)

 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login
+# --- Hugging Face login ---
+HF_TOKEN = st.secrets.get("HF_TOKEN", os.getenv("HF_TOKEN"))
 if HF_TOKEN:
     login(token=HF_TOKEN)
+else:
+    st.error("Hugging Face token not found. Please set it in secrets.toml or environment.")
+    st.stop()
 # --- Load data ---
+@st.cache_data
+def load_data():
+    data_path = os.path.join(os.path.dirname(__file__), 'train_data.csv')
+    df = pd.read_csv(data_path)
+    return df['question'].tolist(), df['answer'].tolist()
+questions, answers = load_data()
 qa_pairs = [f"Q: {q} A: {a}" for q, a in zip(questions, answers)]
+# --- Embedding model and FAISS index ---
+@st.cache_resource
+def setup_embeddings():
+    embedder = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+    answer_embeddings = embedder.encode(answers, show_progress_bar=True)
+    index = faiss.IndexFlatL2(answer_embeddings.shape[1])
+    index.add(np.array(answer_embeddings))
+    return embedder, index
+embedding_model, index = setup_embeddings()
+# --- Load LLaMA model ---
+@st.cache_resource
+def load_llama_model():
+    model_id = "meta-llama/Meta-Llama-3-8B-Instruct"  # Ensure you have access
+    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto"
+    )
+    return tokenizer, model
+tokenizer, model = load_llama_model()
+# --- Helper functions ---
 def sanitize_answer(question, answer):
     return any(word.lower() in answer.lower() for word in question.lower().split())
     count = recent_questions.get(normalized_query, 0) + 1
     recent_questions[normalized_query] = count
     query_embedding = embedding_model.encode([user_query])
     D, I = index.search(np.array(query_embedding), top_k)
     retrieved_answers = [answers[i] for i in I[0]]
         "You are a highly knowledgeable AI assistant specializing strictly in finance.\n"
         "Strictly answer only financially related topics.\n"
         "Never answer questions that are not financially related.\n"
         "Always provide accurate, objective, and concise answers to financial questions.\n"
+        "If a valid financial question is asked, always answer.\n"
         "If a question is unrelated to finance, respond: 'I'm specialized in finance and can't help with that. How can I assist you with a finance-related question today?'\n"
         "If a greeting like 'Hi', 'Hello', or 'Hey' is used, respond with: 'Hello! How can I help you with your finance-related question today?'\n"
     )
+    for _ in range(4):
         prompt = f"""{instruction}
 Background context:
     return "I'm not confident in the response. Please consult a certified financial expert."
+# --- Streamlit UI ---
 st.set_page_config(page_title="DiMowkayBot - Finance Assistant", layout="centered")
+st.title("🤖 DiMowkayBot - Your Finance Q&A Assistant")
+user_query = st.text_input("Ask a finance-related question:")
 if user_query:
+    with st.spinner("Thinking..."):
+        if not is_finance_question(user_query):
+            st.warning("I'm specialized in finance and can't help with that. How can I assist you with a finance-related question today?")
+        else:
             answer = ask_finance_bot(user_query)
+            st.success("Response:")
+            st.write(answer)