Spaces:

danicafisher
/

PrototypingApp

Sleeping

App Files Files Community

danicafisher commited on Oct 2, 2024

Commit

6954de2

verified ·

1 Parent(s): d1fd9ba

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -26

app.py CHANGED Viewed

@@ -26,30 +26,10 @@ GLOBAL CODE HERE
 """
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 Loader = PyMuPDFLoader
-loader = Loader(file_path)
-documents = loader.load()
-docs = text_splitter.split_documents(documents)
-for i, doc in enumerate(docs):
-    doc.metadata["source"] = f"source_{i}"
 # Typical Embedding Model
 core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
-# Typical QDrant Client Set-up
-collection_name = f"pdf_to_parse_{uuid.uuid4()}"
-client = QdrantClient(":memory:")
-client.create_collection(
-    collection_name=collection_name,
-    vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
-)
-# Adding cache!
-store = LocalFileStore("./cache/")
-cached_embedder = CacheBackedEmbeddings.from_bytes_store(
-    core_embeddings, store, namespace=core_embeddings.model
-)
 rag_system_prompt_template = """\
 You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
 """
@@ -96,6 +76,7 @@ async def on_chat_start():
         ).send()
     file = files[0]
     msg = cl.Message(
         content=f"Processing `{file.name}`...", disable_human_feedback=True
@@ -103,12 +84,27 @@ async def on_chat_start():
     await msg.send()
     # load the file
-    if file.path.endswith(".pdf"):
-        texts = process_pdf_file(file)
-    else:
-        texts = process_text_file(file)
-    print(f"Processing {len(texts)} text chunks")
     # Typical QDrant Vector Store Set-up
     vectorstore = QdrantVectorStore(

 """
 text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
 Loader = PyMuPDFLoader
 # Typical Embedding Model
 core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
 rag_system_prompt_template = """\
 You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
 """
         ).send()
     file = files[0]
     msg = cl.Message(
         content=f"Processing `{file.name}`...", disable_human_feedback=True
     await msg.send()
     # load the file
+    loader = Loader(file_path)
+    documents = loader.load()
+    docs = text_splitter.split_documents(documents)
+    for i, doc in enumerate(docs):
+        doc.metadata["source"] = f"source_{i}"
+    print(f"Processing {len(docs)} text chunks")
+    # Typical QDrant Client Set-up
+    collection_name = f"pdf_to_parse_{uuid.uuid4()}"
+    client = QdrantClient(":memory:")
+    client.create_collection(
+        collection_name=collection_name,
+        vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
+    )
+    # Adding cache!
+    store = LocalFileStore("./cache/")
+    cached_embedder = CacheBackedEmbeddings.from_bytes_store(
+        core_embeddings, store, namespace=core_embeddings.model
+    )
     # Typical QDrant Vector Store Set-up
     vectorstore = QdrantVectorStore(