danicafisher commited on
Commit
6954de2
·
verified ·
1 Parent(s): d1fd9ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -26
app.py CHANGED
@@ -26,30 +26,10 @@ GLOBAL CODE HERE
26
  """
27
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
28
  Loader = PyMuPDFLoader
29
- loader = Loader(file_path)
30
- documents = loader.load()
31
- docs = text_splitter.split_documents(documents)
32
- for i, doc in enumerate(docs):
33
- doc.metadata["source"] = f"source_{i}"
34
 
35
  # Typical Embedding Model
36
  core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
37
 
38
- # Typical QDrant Client Set-up
39
- collection_name = f"pdf_to_parse_{uuid.uuid4()}"
40
- client = QdrantClient(":memory:")
41
- client.create_collection(
42
- collection_name=collection_name,
43
- vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
44
- )
45
-
46
- # Adding cache!
47
- store = LocalFileStore("./cache/")
48
- cached_embedder = CacheBackedEmbeddings.from_bytes_store(
49
- core_embeddings, store, namespace=core_embeddings.model
50
- )
51
-
52
-
53
  rag_system_prompt_template = """\
54
  You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
55
  """
@@ -96,6 +76,7 @@ async def on_chat_start():
96
  ).send()
97
 
98
  file = files[0]
 
99
 
100
  msg = cl.Message(
101
  content=f"Processing `{file.name}`...", disable_human_feedback=True
@@ -103,12 +84,27 @@ async def on_chat_start():
103
  await msg.send()
104
 
105
  # load the file
106
- if file.path.endswith(".pdf"):
107
- texts = process_pdf_file(file)
108
- else:
109
- texts = process_text_file(file)
110
-
111
- print(f"Processing {len(texts)} text chunks")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
  # Typical QDrant Vector Store Set-up
114
  vectorstore = QdrantVectorStore(
 
26
  """
27
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
28
  Loader = PyMuPDFLoader
 
 
 
 
 
29
 
30
  # Typical Embedding Model
31
  core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  rag_system_prompt_template = """\
34
  You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
35
  """
 
76
  ).send()
77
 
78
  file = files[0]
79
+
80
 
81
  msg = cl.Message(
82
  content=f"Processing `{file.name}`...", disable_human_feedback=True
 
84
  await msg.send()
85
 
86
  # load the file
87
+ loader = Loader(file_path)
88
+ documents = loader.load()
89
+ docs = text_splitter.split_documents(documents)
90
+ for i, doc in enumerate(docs):
91
+ doc.metadata["source"] = f"source_{i}"
92
+
93
+ print(f"Processing {len(docs)} text chunks")
94
+
95
+ # Typical QDrant Client Set-up
96
+ collection_name = f"pdf_to_parse_{uuid.uuid4()}"
97
+ client = QdrantClient(":memory:")
98
+ client.create_collection(
99
+ collection_name=collection_name,
100
+ vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
101
+ )
102
+
103
+ # Adding cache!
104
+ store = LocalFileStore("./cache/")
105
+ cached_embedder = CacheBackedEmbeddings.from_bytes_store(
106
+ core_embeddings, store, namespace=core_embeddings.model
107
+ )
108
 
109
  # Typical QDrant Vector Store Set-up
110
  vectorstore = QdrantVectorStore(