DataMine commited on
Commit
2d79a6a
·
verified ·
1 Parent(s): 2157e0c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import Chroma
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from transformers import pipeline
7
+
8
+ # Page setup
9
+ st.title("Simple Q&A Assistant")
10
+
11
+ # Load and process PDF
12
+ @st.cache_resource
13
+ def initialize_system():
14
+ # Load PDF
15
+ data = PyPDFLoader("ai_buddy.pdf").load()
16
+
17
+ # Split into chunks
18
+ splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=150)
19
+ splits = splitter.split_documents(data)
20
+
21
+ # Create embeddings and vector store
22
+ embeddings = HuggingFaceEmbeddings(
23
+ model_name="sentence-transformers/all-MiniLM-L6-v2"
24
+ )
25
+ vector_db = Chroma.from_documents(documents=splits, embedding=embeddings)
26
+
27
+ # Setup QA pipeline
28
+ qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
29
+
30
+ return vector_db, qa_model
31
+
32
+ # Initialize the system
33
+ if 'vector_db' not in st.session_state:
34
+ st.session_state.vector_db, st.session_state.qa_model = initialize_system()
35
+
36
+ # Function to answer questions
37
+ def get_answer(question):
38
+ # Get relevant documents
39
+ docs = st.session_state.vector_db.as_retriever().get_relevant_documents(question)
40
+
41
+ if not docs:
42
+ return "Sorry, I couldn't find any relevant information."
43
+
44
+ # Combine document contents
45
+ context = " ".join([doc.page_content for doc in docs])
46
+
47
+ # Get answer
48
+ response = st.session_state.qa_model(question=question, context=context)
49
+ return response['answer']
50
+
51
+ # Simple input/output interface
52
+ question = st.text_input("Ask your question:")
53
+ if question:
54
+ with st.spinner("Finding answer..."):
55
+ answer = get_answer(question)
56
+ st.write("Answer:", answer)