WillyCodesInit commited on
Commit
6e84237
·
verified ·
1 Parent(s): 403aa45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -5,7 +5,7 @@ from sentence_transformers import SentenceTransformer
5
  import faiss
6
  import numpy as np
7
 
8
- st.title("💬 Fin$mart Chatbot")
9
  st.markdown("Ask financial questions and get answers based on expert knowledge.")
10
 
11
  # Load models
@@ -21,13 +21,13 @@ tokenizer, model, embedder = load_models()
21
  # Load data
22
  @st.cache_data
23
  def load_data():
24
- df = pd.read_csv("train_data.csv").head(100) # Limit to 100 rows for performance
25
- df['content'] = df['answer'] # Ensure 'content' column exists
26
  return df
27
 
28
  data = load_data()
29
 
30
- # Build vector store
31
  @st.cache_resource
32
  def build_vector_store(texts):
33
  embeddings = embedder.encode(texts)
@@ -43,21 +43,21 @@ index, embeddings = build_vector_store(texts)
43
  prompt = st.chat_input("Ask something about finance...")
44
 
45
  if prompt:
46
- # Embed and search
47
  q_embed = embedder.encode([prompt])
48
  _, I = index.search(np.array(q_embed), k=3)
49
  context = " ".join([texts[i] for i in I[0]])
50
 
51
- # Format prompt for T5
52
- input_text = f"Answer the question using the context below.\nContext: {context}\nQuestion: {prompt}"
53
  inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
54
  outputs = model.generate(**inputs, max_length=100)
55
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
 
57
- # Display answer
58
  st.markdown(f"**Answer:** {answer}")
59
 
60
- # Optionally show context
61
- with st.expander("🔍 Context Used"):
62
  for i in I[0]:
63
  st.write(texts[i])
 
5
  import faiss
6
  import numpy as np
7
 
8
+ st.title("Fin$mart Chatbot")
9
  st.markdown("Ask financial questions and get answers based on expert knowledge.")
10
 
11
  # Load models
 
21
  # Load data
22
  @st.cache_data
23
  def load_data():
24
+ df = pd.read_csv("train_data.csv").head(100) # Adjust row count if needed
25
+ df['content'] = df['answer'] # Ensure 'content' is mapped correctly
26
  return df
27
 
28
  data = load_data()
29
 
30
+ # Build vector store with FAISS
31
  @st.cache_resource
32
  def build_vector_store(texts):
33
  embeddings = embedder.encode(texts)
 
43
  prompt = st.chat_input("Ask something about finance...")
44
 
45
  if prompt:
46
+ # Encode the question and search for top 3 matches
47
  q_embed = embedder.encode([prompt])
48
  _, I = index.search(np.array(q_embed), k=3)
49
  context = " ".join([texts[i] for i in I[0]])
50
 
51
+ # Format prompt for T5 with better structure
52
+ input_text = f"Based on the context below, answer the question.\n\nContext: {context}\n\nQuestion: {prompt}"
53
  inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512)
54
  outputs = model.generate(**inputs, max_length=100)
55
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
 
57
+ # Display the generated answer
58
  st.markdown(f"**Answer:** {answer}")
59
 
60
+ # Show retrieved content as reference
61
+ with st.expander(" Context Used"):
62
  for i in I[0]:
63
  st.write(texts[i])