WillyCodesInit commited on
Commit
71a3adc
·
verified ·
1 Parent(s): 89da9cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -11
app.py CHANGED
@@ -1,15 +1,56 @@
1
  import streamlit as st
2
- from utils import ask_finance_bot
 
 
 
 
3
 
4
- st.set_page_config(page_title="FinanceBot", layout="centered")
5
- st.title("💼 FinanceBot")
6
 
7
- user_input = st.text_input("Enter your finance-related question:")
 
 
 
 
 
 
8
 
9
- if st.button("Ask"):
10
- if user_input.strip():
11
- with st.spinner("Generating answer..."):
12
- answer = ask_finance_bot(user_input)
13
- st.success(answer)
14
- else:
15
- st.warning("Please enter a question.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ import pandas as pd
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ from sentence_transformers import SentenceTransformer
5
+ import faiss
6
+ import numpy as np
7
 
8
+ st.title("T5 QA Chatbot on CSV Content")
 
9
 
10
+ # Load models
11
+ @st.cache_resource
12
+ def load_models():
13
+ tokenizer = AutoTokenizer.from_pretrained("t5-small")
14
+ model = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
15
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
16
+ return tokenizer, model, embedder
17
 
18
+ tokenizer, model, embedder = load_models()
19
+
20
+ # Load data
21
+ @st.cache_data
22
+ def load_data():
23
+ df = pd.read_csv("train_data.csv").head(100)
24
+ df['content'] = df['answer']
25
+ return df
26
+
27
+ data = load_data()
28
+
29
+ # Build vector store
30
+ @st.cache_resource
31
+ def build_vector_store(texts):
32
+ embeddings = embedder.encode(texts)
33
+ dim = embeddings[0].shape[0]
34
+ index = faiss.IndexFlatL2(dim)
35
+ index.add(np.array(embeddings))
36
+ return index, embeddings
37
+
38
+ texts = data['content'].tolist()
39
+ index, embeddings = build_vector_store(texts)
40
+
41
+ # Chat UI
42
+ prompt = st.chat_input("Ask something about the content...")
43
+
44
+ if prompt:
45
+ # Embed prompt and retrieve top 3
46
+ q_embed = embedder.encode([prompt])
47
+ _, I = index.search(np.array(q_embed), k=3)
48
+ context = " ".join([texts[i] for i in I[0]])
49
+
50
+ # Format prompt for T5
51
+ input_text = f"question: {prompt} context: {context}"
52
+ inputs = tokenizer(input_text, return_tensors="pt", truncation=True)
53
+ outputs = model.generate(**inputs, max_length=100)
54
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
55
+
56
+ st.markdown(f"**Answer:** {answer}")