Spaces:

bacancydataprophets
/

MeDocChat

Runtime error

App Files Files Community

akash015 commited on Jun 27, 2024

Commit

43aad56

verified ·

1 Parent(s): 7deaf1b

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -32

app.py CHANGED Viewed

@@ -180,7 +180,7 @@
 # v2
-import re
 import PyPDF2
 from langchain_community.embeddings import OllamaEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -195,14 +195,13 @@ import logging
 import pypandoc
 import pdfkit
 from paddleocr import PaddleOCR
-import fitz
 import asyncio
 from langchain_nomic.embeddings import NomicEmbeddings
-import os
 llm_groq = ChatGroq(
-    model_name='llama3-70b-8192'
-)
 # Initialize anonymizer
 anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL'], faker_seed=18)
@@ -276,11 +275,21 @@ async def extract_text_from_mixed_pdf(file_path):
         pdf_text += text
     return pdf_text
 @cl.on_chat_start
 async def on_chat_start():
     files = None # Initialize variable to store uploaded files
     # Wait for the user to upload a file
     while files is None:
         files = await cl.AskFileMessage(
@@ -308,14 +317,7 @@ async def on_chat_start():
     )
     embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
-    # Clear the existing Chroma vector store
-    docsearch = await cl.make_async(Chroma.from_texts)(
-        [], embeddings, metadatas=[]
-    )
-    docsearch.delete()
-    # Create a new Chroma vector store
     docsearch = await cl.make_async(Chroma.from_texts)(
         [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
     )
@@ -345,8 +347,6 @@ async def on_chat_start():
     await msg.update()
     # Store the chain in user session
     cl.user_session.set("chain", chain)
-    cl.user_session.set("docsearch", docsearch)  # Store the docsearch in session
-    cl.user_session.set("file_path", file.path)  # Store the file path in session
 @cl.on_message
@@ -366,21 +366,3 @@ async def main(message: cl.Message):
     # Return results
     await cl.Message(content=answer, elements=text_elements).send()
-@cl.on_chat_end
-async def on_chat_end():
-    docsearch = cl.user_session.get("docsearch")
-    file_path = cl.user_session.get("file_path")
-    if docsearch:
-        # Clear the vector store
-        docsearch.delete()
-    if file_path and os.path.exists(file_path):
-        # Remove the uploaded file
-        os.remove(file_path)
-    # Clear the user session data
-    cl.user_session.clear()
-    logging.info("User session ended, data cleared.")

 # v2
+import re
 import PyPDF2
 from langchain_community.embeddings import OllamaEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 import pypandoc
 import pdfkit
 from paddleocr import PaddleOCR
+import fitz
 import asyncio
 from langchain_nomic.embeddings import NomicEmbeddings
 llm_groq = ChatGroq(
+            model_name='llama3-70b-8192'
+    )
 # Initialize anonymizer
 anonymizer = PresidioReversibleAnonymizer(analyzed_fields=['PERSON', 'EMAIL_ADDRESS', 'PHONE_NUMBER', 'IBAN_CODE', 'CREDIT_CARD', 'CRYPTO', 'IP_ADDRESS', 'LOCATION', 'DATE_TIME', 'NRP', 'MEDICAL_LICENSE', 'URL'], faker_seed=18)
         pdf_text += text
     return pdf_text
+# Function to clear the ChromaDB
+async def clear_chroma_db(chroma_instance):
+    await chroma_instance.delete()
 @cl.on_chat_start
 async def on_chat_start():
     files = None # Initialize variable to store uploaded files
+    # Initialize ChromaDB
+    chroma_instance = await cl.make_async(Chroma)()
+    # Clear any existing data in ChromaDB
+    await clear_chroma_db(chroma_instance)
     # Wait for the user to upload a file
     while files is None:
         files = await cl.AskFileMessage(
     )
     embeddings = NomicEmbeddings(model="nomic-embed-text-v1.5")
     docsearch = await cl.make_async(Chroma.from_texts)(
         [anonymized_text], embeddings, metadatas=[{"source": "0-pl"}]
     )
     await msg.update()
     # Store the chain in user session
     cl.user_session.set("chain", chain)
 @cl.on_message
     # Return results
     await cl.Message(content=answer, elements=text_elements).send()