DataMine commited on
Commit
9998a26
·
verified ·
1 Parent(s): 2d79a6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -20
app.py CHANGED
@@ -4,6 +4,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
  from langchain.vectorstores import Chroma
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from transformers import pipeline
 
7
 
8
  # Page setup
9
  st.title("Simple Q&A Assistant")
@@ -11,18 +12,39 @@ st.title("Simple Q&A Assistant")
11
  # Load and process PDF
12
  @st.cache_resource
13
  def initialize_system():
14
- # Load PDF
15
- data = PyPDFLoader("ai_buddy.pdf").load()
16
 
17
- # Split into chunks
18
- splitter = RecursiveCharacterTextSplitter(chunk_size=750, chunk_overlap=150)
19
- splits = splitter.split_documents(data)
20
-
21
- # Create embeddings and vector store
22
  embeddings = HuggingFaceEmbeddings(
23
  model_name="sentence-transformers/all-MiniLM-L6-v2"
24
  )
25
- vector_db = Chroma.from_documents(documents=splits, embedding=embeddings)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Setup QA pipeline
28
  qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
@@ -35,21 +57,28 @@ if 'vector_db' not in st.session_state:
35
 
36
  # Function to answer questions
37
  def get_answer(question):
38
- # Get relevant documents
39
- docs = st.session_state.vector_db.as_retriever().get_relevant_documents(question)
40
-
41
- if not docs:
42
- return "Sorry, I couldn't find any relevant information."
43
-
44
- # Combine document contents
45
- context = " ".join([doc.page_content for doc in docs])
46
-
47
- # Get answer
48
- response = st.session_state.qa_model(question=question, context=context)
49
- return response['answer']
 
 
 
 
 
 
50
 
51
  # Simple input/output interface
52
  question = st.text_input("Ask your question:")
 
53
  if question:
54
  with st.spinner("Finding answer..."):
55
  answer = get_answer(question)
 
4
  from langchain.vectorstores import Chroma
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from transformers import pipeline
7
+ import os
8
 
9
  # Page setup
10
  st.title("Simple Q&A Assistant")
 
12
  # Load and process PDF
13
  @st.cache_resource
14
  def initialize_system():
15
+ # Set up persistent directory for Chroma
16
+ persist_directory = "chroma_db"
17
 
18
+ # Create embeddings
 
 
 
 
19
  embeddings = HuggingFaceEmbeddings(
20
  model_name="sentence-transformers/all-MiniLM-L6-v2"
21
  )
22
+
23
+ # Check if database already exists
24
+ if not os.path.exists(persist_directory):
25
+ # Load PDF
26
+ data = PyPDFLoader("ai_buddy.pdf").load()
27
+
28
+ # Split into chunks
29
+ splitter = RecursiveCharacterTextSplitter(
30
+ chunk_size=750,
31
+ chunk_overlap=150
32
+ )
33
+ splits = splitter.split_documents(data)
34
+
35
+ # Create and persist vector store
36
+ vector_db = Chroma.from_documents(
37
+ documents=splits,
38
+ embedding=embeddings,
39
+ persist_directory=persist_directory
40
+ )
41
+ vector_db.persist()
42
+ else:
43
+ # Load existing database
44
+ vector_db = Chroma(
45
+ persist_directory=persist_directory,
46
+ embedding_function=embeddings
47
+ )
48
 
49
  # Setup QA pipeline
50
  qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
 
57
 
58
  # Function to answer questions
59
  def get_answer(question):
60
+ try:
61
+ # Get relevant documents
62
+ docs = st.session_state.vector_db.as_retriever().get_relevant_documents(question)
63
+
64
+ if not docs:
65
+ return "Sorry, I couldn't find any relevant information."
66
+
67
+ # Combine document contents
68
+ context = " ".join([doc.page_content for doc in docs])
69
+
70
+ # Get answer
71
+ response = st.session_state.qa_model(
72
+ question=question,
73
+ context=context
74
+ )
75
+ return response['answer']
76
+ except Exception as e:
77
+ return f"An error occurred: {str(e)}"
78
 
79
  # Simple input/output interface
80
  question = st.text_input("Ask your question:")
81
+
82
  if question:
83
  with st.spinner("Finding answer..."):
84
  answer = get_answer(question)