WillyCodesInit commited on
Commit
ad707c0
·
verified ·
1 Parent(s): 45ecf37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -27
app.py CHANGED
@@ -1,9 +1,7 @@
1
  import streamlit as st
2
- import pandas as pd
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from sentence_transformers import SentenceTransformer
5
- import faiss
6
- import numpy as np
7
 
8
  st.title("Fin$mart Chatbot")
9
  st.markdown("Ask financial questions and get answers based on expert knowledge.")
@@ -18,37 +16,25 @@ def load_models():
18
 
19
  tokenizer, model, embedder = load_models()
20
 
21
- # Load data
22
- @st.cache_data
23
- def load_data():
24
- df = pd.read_csv("train_data.csv").head(100)
25
- df['content'] = "Q: " + df['question'] + " A: " + df['answer'] # Combine Q+A for better retrieval
26
- return df
27
-
28
- data = load_data()
29
-
30
- # Build vector store
31
  @st.cache_resource
32
- def build_vector_store(texts):
33
- embeddings = embedder.encode(texts, show_progress_bar=True)
34
- dim = embeddings[0].shape[0]
35
- index = faiss.IndexFlatL2(dim)
36
- index.add(np.array(embeddings))
37
- return index, embeddings
38
 
39
- texts = data['content'].tolist()
40
- index, embeddings = build_vector_store(texts)
41
 
42
  # Chat interface
43
  prompt = st.chat_input("Ask something about finance...")
44
 
45
  if prompt:
46
- # Embed and retrieve top 3 relevant pieces of text
47
  q_embed = embedder.encode([prompt])
48
- _, I = index.search(np.array(q_embed), k=3)
49
  context = " ".join([texts[i] for i in I[0]])
50
 
51
- # Prepare the prompt for flan-t5-base
52
  input_text = (
53
  f"You are a helpful financial assistant. Use the information provided below to answer the user's question.\n\n"
54
  f"Context: {context}\n\n"
@@ -60,10 +46,10 @@ if prompt:
60
  outputs = model.generate(**inputs, max_length=150)
61
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
62
 
63
- # Display the answer
64
  st.markdown(f"**Answer:** {answer}")
65
 
66
- # Optionally show the context used
67
  with st.expander("Context Used"):
68
  for i in I[0]:
69
- st.write(texts[i])
 
1
  import streamlit as st
 
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
  from sentence_transformers import SentenceTransformer
4
+ import pickle
 
5
 
6
  st.title("Fin$mart Chatbot")
7
  st.markdown("Ask financial questions and get answers based on expert knowledge.")
 
16
 
17
  tokenizer, model, embedder = load_models()
18
 
19
+ # Load vector store from existing pickle file
 
 
 
 
 
 
 
 
 
20
  @st.cache_resource
21
+ def load_vector_store():
22
+ with open("vectorstore.pkl", "rb") as f:
23
+ index, texts, _ = pickle.load(f) # We ignore embeddings if not needed
24
+ return index, texts
 
 
25
 
26
+ index, texts = load_vector_store()
 
27
 
28
  # Chat interface
29
  prompt = st.chat_input("Ask something about finance...")
30
 
31
  if prompt:
32
+ # Embed query and retrieve top 3 results
33
  q_embed = embedder.encode([prompt])
34
+ _, I = index.search(q_embed, k=3)
35
  context = " ".join([texts[i] for i in I[0]])
36
 
37
+ # Build input for Flan-T5
38
  input_text = (
39
  f"You are a helpful financial assistant. Use the information provided below to answer the user's question.\n\n"
40
  f"Context: {context}\n\n"
 
46
  outputs = model.generate(**inputs, max_length=150)
47
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
 
49
+ # Display response
50
  st.markdown(f"**Answer:** {answer}")
51
 
52
+ # Show retrieved context
53
  with st.expander("Context Used"):
54
  for i in I[0]:
55
+ st.write(texts[i])