meidkad commited on
Commit
127c86c
·
verified ·
1 Parent(s): 3994b08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +113 -112
app.py CHANGED
@@ -1,113 +1,114 @@
1
- import PyPDF2
2
- from langchain_community.embeddings import OllamaEmbeddings
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain_community.vectorstores import Chroma
5
- from langchain.chains import ConversationalRetrievalChain
6
- from langchain_community.chat_models import ChatOllama
7
- from langchain.memory import ChatMessageHistory, ConversationBufferMemory
8
- import chainlit as cl
9
-
10
-
11
-
12
- @cl.on_chat_start
13
- async def on_chat_start():
14
- files = None #Initialize variable to store uploaded files
15
-
16
- # Wait for the user to upload a file
17
- while files is None:
18
- files = await cl.AskFileMessage(
19
- content="Please upload a pdf file to begin!",
20
- accept=["application/pdf"],
21
- max_size_mb=100,# Optionally limit the file size
22
- timeout=180, # Set a timeout for user response,
23
- ).send()
24
-
25
- file = files[0] # Get the first uploaded file
26
- print(file) # Print the file object for debugging
27
-
28
- # Sending an image with the local file path
29
- elements = [
30
- cl.Image(name="image", display="inline", path="pic.jpg")
31
- ]
32
- # Inform the user that processing has started
33
- msg = cl.Message(content=f"Processing `{file.name}`...",elements=elements)
34
- await msg.send()
35
-
36
- # Read the PDF file
37
- pdf = PyPDF2.PdfReader(file.path)
38
- pdf_text = ""
39
- for page in pdf.pages:
40
- pdf_text += page.extract_text()
41
-
42
-
43
- # Split the text into chunks
44
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=50)
45
- texts = text_splitter.split_text(pdf_text)
46
-
47
- # Create a metadata for each chunk
48
- metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
49
-
50
- # Create a Chroma vector store
51
- embeddings = OllamaEmbeddings(model="nomic-embed-text")
52
- docsearch = await cl.make_async(Chroma.from_texts)(
53
- texts, embeddings, metadatas=metadatas
54
- )
55
-
56
- # Initialize message history for conversation
57
- message_history = ChatMessageHistory()
58
-
59
- # Memory for conversational context
60
- memory = ConversationBufferMemory(
61
- memory_key="chat_history",
62
- output_key="answer",
63
- chat_memory=message_history,
64
- return_messages=True,
65
- )
66
-
67
- # Create a chain that uses the Chroma vector store
68
- chain = ConversationalRetrievalChain.from_llm(
69
- ChatOllama(model="gemma:7b"),
70
- chain_type="stuff",
71
- retriever=docsearch.as_retriever(),
72
- memory=memory,
73
- return_source_documents=True,
74
- )
75
-
76
- # Let the user know that the system is ready
77
- msg.content = f"Processing `{file.name}` done. You can now ask questions!"
78
- await msg.update()
79
- #store the chain in user session
80
- cl.user_session.set("chain", chain)
81
-
82
-
83
- @cl.on_message
84
- async def main(message: cl.Message):
85
- # Retrieve the chain from user session
86
- chain = cl.user_session.get("chain")
87
- #call backs happens asynchronously/parallel
88
- cb = cl.AsyncLangchainCallbackHandler()
89
-
90
- # call the chain with user's message content
91
- res = await chain.ainvoke(message.content, callbacks=[cb])
92
- answer = res["answer"]
93
- source_documents = res["source_documents"]
94
-
95
- text_elements = [] # Initialize list to store text elements
96
-
97
- # Process source documents if available
98
- if source_documents:
99
- for source_idx, source_doc in enumerate(source_documents):
100
- source_name = f"source_{source_idx}"
101
- # Create the text element referenced in the message
102
- text_elements.append(
103
- cl.Text(content=source_doc.page_content, name=source_name)
104
- )
105
- source_names = [text_el.name for text_el in text_elements]
106
-
107
- # Add source references to the answer
108
- if source_names:
109
- answer += f"\nSources: {', '.join(source_names)}"
110
- else:
111
- answer += "\nNo sources found"
112
- #return results
 
113
  await cl.Message(content=answer, elements=text_elements).send()
 
1
+ import PyPDF2
2
+ from langchain_community.embeddings import OllamaEmbeddings
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_community.vectorstores import Chroma
5
+ from langchain.chains import ConversationalRetrievalChain
6
+ from langchain_community.chat_models import ChatOllama
7
+ from langchain_community.chat_message_histories import ChatMessageHistory, ConversationBufferMemory
8
+
9
+ import chainlit as cl
10
+
11
+
12
+
13
+ @cl.on_chat_start
14
+ async def on_chat_start():
15
+ files = None #Initialize variable to store uploaded files
16
+
17
+ # Wait for the user to upload a file
18
+ while files is None:
19
+ files = await cl.AskFileMessage(
20
+ content="Please upload a pdf file to begin!",
21
+ accept=["application/pdf"],
22
+ max_size_mb=100,# Optionally limit the file size
23
+ timeout=180, # Set a timeout for user response,
24
+ ).send()
25
+
26
+ file = files[0] # Get the first uploaded file
27
+ print(file) # Print the file object for debugging
28
+
29
+ # Sending an image with the local file path
30
+ elements = [
31
+ cl.Image(name="image", display="inline", path="pic.jpg")
32
+ ]
33
+ # Inform the user that processing has started
34
+ msg = cl.Message(content=f"Processing `{file.name}`...",elements=elements)
35
+ await msg.send()
36
+
37
+ # Read the PDF file
38
+ pdf = PyPDF2.PdfReader(file.path)
39
+ pdf_text = ""
40
+ for page in pdf.pages:
41
+ pdf_text += page.extract_text()
42
+
43
+
44
+ # Split the text into chunks
45
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=50)
46
+ texts = text_splitter.split_text(pdf_text)
47
+
48
+ # Create a metadata for each chunk
49
+ metadatas = [{"source": f"{i}-pl"} for i in range(len(texts))]
50
+
51
+ # Create a Chroma vector store
52
+ embeddings = OllamaEmbeddings(model="nomic-embed-text")
53
+ docsearch = await cl.make_async(Chroma.from_texts)(
54
+ texts, embeddings, metadatas=metadatas
55
+ )
56
+
57
+ # Initialize message history for conversation
58
+ message_history = ChatMessageHistory()
59
+
60
+ # Memory for conversational context
61
+ memory = ConversationBufferMemory(
62
+ memory_key="chat_history",
63
+ output_key="answer",
64
+ chat_memory=message_history,
65
+ return_messages=True,
66
+ )
67
+
68
+ # Create a chain that uses the Chroma vector store
69
+ chain = ConversationalRetrievalChain.from_llm(
70
+ ChatOllama(model="gemma:7b"),
71
+ chain_type="stuff",
72
+ retriever=docsearch.as_retriever(),
73
+ memory=memory,
74
+ return_source_documents=True,
75
+ )
76
+
77
+ # Let the user know that the system is ready
78
+ msg.content = f"Processing `{file.name}` done. You can now ask questions!"
79
+ await msg.update()
80
+ #store the chain in user session
81
+ cl.user_session.set("chain", chain)
82
+
83
+
84
+ @cl.on_message
85
+ async def main(message: cl.Message):
86
+ # Retrieve the chain from user session
87
+ chain = cl.user_session.get("chain")
88
+ #call backs happens asynchronously/parallel
89
+ cb = cl.AsyncLangchainCallbackHandler()
90
+
91
+ # call the chain with user's message content
92
+ res = await chain.ainvoke(message.content, callbacks=[cb])
93
+ answer = res["answer"]
94
+ source_documents = res["source_documents"]
95
+
96
+ text_elements = [] # Initialize list to store text elements
97
+
98
+ # Process source documents if available
99
+ if source_documents:
100
+ for source_idx, source_doc in enumerate(source_documents):
101
+ source_name = f"source_{source_idx}"
102
+ # Create the text element referenced in the message
103
+ text_elements.append(
104
+ cl.Text(content=source_doc.page_content, name=source_name)
105
+ )
106
+ source_names = [text_el.name for text_el in text_elements]
107
+
108
+ # Add source references to the answer
109
+ if source_names:
110
+ answer += f"\nSources: {', '.join(source_names)}"
111
+ else:
112
+ answer += "\nNo sources found"
113
+ #return results
114
  await cl.Message(content=answer, elements=text_elements).send()