manasdhir commited on
Commit
37b1f26
1 Parent(s): 2346071

minor changes

Browse files
Files changed (5) hide show
  1. app.py +1 -2
  2. docling_chunker.py +17 -0
  3. logs.txt +1249 -0
  4. tokenizer.py +50 -0
  5. uv.lock +0 -0
app.py CHANGED
@@ -51,7 +51,6 @@ def verify_bearer_token(authorization: Optional[str]) -> None:
51
  async def run_hackrx(
52
  request: Request,
53
  payload: HackRxRequest,
54
- background_tasks: BackgroundTasks,
55
  authorization: Optional[str] = Header(None)
56
  ):
57
  # Verify Authorization Bearer token
@@ -66,5 +65,5 @@ async def run_hackrx(
66
  contexts=get_context_for_questions(questions)
67
  prompts=construct_prompts(questions,contexts)
68
  answers=generate_answers(prompts)
69
- background_tasks.add_task(clear_collection_payloads)
70
  return {"answers": answers}
 
51
  async def run_hackrx(
52
  request: Request,
53
  payload: HackRxRequest,
 
54
  authorization: Optional[str] = Header(None)
55
  ):
56
  # Verify Authorization Bearer token
 
65
  contexts=get_context_for_questions(questions)
66
  prompts=construct_prompts(questions,contexts)
67
  answers=generate_answers(prompts)
68
+ clear_collection_payloads()
69
  return {"answers": answers}
docling_chunker.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from docling.chunking import HybridChunker
2
+ from docling_core.transforms.chunker.tokenizer.openai import OpenAITokenizer
3
+ import tiktoken
4
+
5
+ tokenizer = OpenAITokenizer(tokenizer=tiktoken.encoding_for_model("gpt-4o"), max_tokens=128 * 1024)
6
+
7
+ chunker=HybridChunker(
8
+ tokenizer=tokenizer,
9
+ max_tokens=3000,
10
+ min_tokens=1000,
11
+ merge_peers=True
12
+ )
13
+
14
+ chunk_iter=chunker.chunk(dl_doc="content.txt")
15
+ chunks = list(chunk_iter)
16
+ print(chunks)
17
+ print(len(chunks))
logs.txt ADDED
@@ -0,0 +1,1249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== Application Startup at 2025-07-31 17:10:25 =====
2
+
3
+ INFO: Started server process [1]
4
+ INFO: Waiting for application startup.
5
+ INFO: Application startup complete.
6
+ INFO: Uvicorn running on http://0.0.0.0:7860 (Press CTRL+C to quit)
7
+ Collection 'test' already exists
8
+ https://hackrx.blob.core.windows.net/assets/policy.pdf?sv=2023-01-03&st=2025-07-04T09%3A11%3A24Z&se=2027-07-05T09%3A11%3A00Z&sr=b&sp=r&sig=N4a9OU0w0QXO6AOIBiu4bpl7AXvEZogeT%2FjUHNO7HzQ%3D
9
+ ['What is the grace period for premium payment under the National Parivar Mediclaim Plus Policy?', 'What is the waiting period for pre-existing diseases (PED) to be covered?', 'Does this policy cover maternity expenses, and what are the conditions?', 'What is the waiting period for cataract surgery?', 'Are the medical expenses for an organ donor covered under this policy?', 'What is the No Claim Discount (NCD) offered in this policy?', 'Is there a benefit for preventive health check-ups?', "How does the policy define a 'Hospital'?", 'What is the extent of coverage for AYUSH treatments?', 'Are there any sub-limits on room rent and ICU charges for Plan A?']
10
+
11
+ Batch 1:
12
+ Chunk 1 word count: 165
13
+ Chunk 2 word count: 165
14
+ Chunk 3 word count: 1995
15
+ Chunk 4 word count: 1965
16
+ Chunk 5 word count: 1671
17
+ Chunk 6 word count: 378
18
+ Chunk 7 word count: 1988
19
+ Chunk 8 word count: 221
20
+ Chunk 9 word count: 1902
21
+ Chunk 10 word count: 1907
22
+ Chunk 11 word count: 119
23
+ Chunk 12 word count: 880
24
+ Chunk 13 word count: 166
25
+ Chunk 14 word count: 1634
26
+ Chunk 15 word count: 1556
27
+ Chunk 16 word count: 1802
28
+
29
+ Batch 2:
30
+ Chunk 1 word count: 422
31
+
32
+ Summary:
33
+ Chunks split by level 1 headings (#): 1
34
+ Chunks split by grouped level 2 headings (##): 10
35
+ Chunks split by paragraphs: 2
36
+ Batch 1 embeddings received, total embeddings so far: 16
37
+ Batch 2 embeddings received, total embeddings so far: 17
38
+ Upserted points 0 to 16
39
+ Total upserted points: 17
40
+ processing complete
41
+ INFO: 10.16.23.6:6263 - "POST /api/v1/hackrx/run HTTP/1.1" 200 OK
42
+ delete done
43
+ INFO: 10.16.32.117:59200 - "GET / HTTP/1.1" 404 Not Found
44
+ https://hackrx.blob.core.windows.net/assets/Arogya%20Sanjeevani%20Policy%20-%20CIN%20-%20U10200WB1906GOI001713%201.pdf?sv=2023-01-03&st=2025-07-21T08%3A29%3A02Z&se=2025-09-22T08%3A29%3A00Z&sr=b&sp=r&sig=nzrz1K9Iurt%2BBXom%2FB%2BMPTFMFP3PRnIvEsipAX10Ig4%3D
45
+ ['When will my root canal claim of Rs 25,000 be settled?', 'I have done an IVF for Rs 56,000. Is it covered?', 'I did a cataract treatment of Rs 100,000. Will you settle the full Rs 100,000?', 'Give me a list of documents to be uploaded for hospitalization for heart surgery.']
46
+
47
+ Batch 1:
48
+ Chunk 1 word count: 189
49
+ Chunk 2 word count: 119
50
+ Chunk 3 word count: 1952
51
+ Chunk 4 word count: 1937
52
+ Chunk 5 word count: 506
53
+ Chunk 6 word count: 567
54
+ Chunk 7 word count: 1081
55
+ Chunk 8 word count: 1818
56
+ Chunk 9 word count: 1990
57
+ Chunk 10 word count: 699
58
+ Chunk 11 word count: 1887
59
+ Chunk 12 word count: 956
60
+
61
+ Summary:
62
+ Chunks split by level 1 headings (#): 2
63
+ Chunks split by grouped level 2 headings (##): 9
64
+ Chunks split by paragraphs: 0
65
+ Batch 1 embeddings received, total embeddings so far: 12
66
+ Upserted points 0 to 11
67
+ Total upserted points: 12
68
+ processing complete
69
+ INFO: 10.16.21.252:15207 - "POST /api/v1/hackrx/run HTTP/1.1" 200 OK
70
+ delete done
71
+ https://hackrx.blob.core.windows.net/assets/Arogya%20Sanjeevani%20Policy%20-%20CIN%20-%20U10200WB1906GOI001713%201.pdf?sv=2023-01-03&st=2025-07-21T08%3A29%3A02Z&se=2025-09-22T08%3A29%3A00Z&sr=b&sp=r&sig=nzrz1K9Iurt%2BBXom%2FB%2BMPTFMFP3PRnIvEsipAX10Ig4%3D
72
+ ["I have raised a claim for hospitalization for Rs 200,000 with HDFC, and it's approved. My total expenses are Rs 250,000. Can I raise the remaining Rs 50,000 with you?"]
73
+
74
+ Batch 1:
75
+ Chunk 1 word count: 189
76
+ Chunk 2 word count: 119
77
+ Chunk 3 word count: 1952
78
+ Chunk 4 word count: 1937
79
+ Chunk 5 word count: 506
80
+ Chunk 6 word count: 567
81
+ Chunk 7 word count: 1081
82
+ Chunk 8 word count: 1818
83
+ Chunk 9 word count: 1990
84
+ Chunk 10 word count: 699
85
+ Chunk 11 word count: 1887
86
+ Chunk 12 word count: 956
87
+
88
+ Summary:
89
+ Chunks split by level 1 headings (#): 2
90
+ Chunks split by grouped level 2 headings (##): 9
91
+ Chunks split by paragraphs: 0
92
+ Batch 1 embeddings received, total embeddings so far: 12
93
+ Upserted points 0 to 11
94
+ Total upserted points: 12
95
+ processing complete
96
+ INFO: 10.16.23.6:7493 - "POST /api/v1/hackrx/run HTTP/1.1" 200 OK
97
+ delete done
98
+ https://hackrx.blob.core.windows.net/assets/Super_Splendor_(Feb_2023).pdf?sv=2023-01-03&st=2025-07-21T08%3A10%3A00Z&se=2025-09-22T08%3A10%3A00Z&sr=b&sp=r&sig=vhHrl63YtrEOCsAy%2BpVKr20b3ZUo5HMz1lF9%2BJh6LQ0%3D
99
+ ['What is the ideal spark plug gap recommeded', 'Does this comes in tubeless tyre version', 'Is it compulsoury to have a disc brake', 'Can I put thums up instead of oil', 'Give me JS code to generate a random number between 1 and 100']
100
+
101
+ Batch 1:
102
+ Chunk 1 word count: 257
103
+ Chunk 2 word count: 129
104
+ Chunk 3 word count: 378
105
+ Chunk 4 word count: 1182
106
+ Chunk 5 word count: 335
107
+ Chunk 6 word count: 837
108
+ Chunk 7 word count: 356
109
+ Chunk 8 word count: 248
110
+ Chunk 9 word count: 136
111
+ Chunk 10 word count: 419
112
+ Chunk 11 word count: 231
113
+ Chunk 12 word count: 94
114
+ Chunk 13 word count: 38
115
+ Chunk 14 word count: 472
116
+ Chunk 15 word count: 53
117
+ Chunk 16 word count: 134
118
+
119
+ Batch 2:
120
+ Chunk 1 word count: 56
121
+ Chunk 2 word count: 623
122
+ Chunk 3 word count: 177
123
+ Chunk 4 word count: 510
124
+ Chunk 5 word count: 107
125
+ Chunk 6 word count: 155
126
+ Chunk 7 word count: 461
127
+ Chunk 8 word count: 104
128
+ Chunk 9 word count: 97
129
+ Chunk 10 word count: 119
130
+ Chunk 11 word count: 356
131
+ Chunk 12 word count: 67
132
+ Chunk 13 word count: 145
133
+ Chunk 14 word count: 575
134
+ Chunk 15 word count: 246
135
+ Chunk 16 word count: 824
136
+
137
+ Batch 3:
138
+ Chunk 1 word count: 1063
139
+ Chunk 2 word count: 1574
140
+ Chunk 3 word count: 708
141
+ Chunk 4 word count: 232
142
+ Chunk 5 word count: 276
143
+ Chunk 6 word count: 237
144
+ Chunk 7 word count: 1995
145
+ Chunk 8 word count: 261
146
+ Chunk 9 word count: 154
147
+ Chunk 10 word count: 370
148
+ Chunk 11 word count: 47
149
+ Chunk 12 word count: 18
150
+ Chunk 13 word count: 91
151
+ Chunk 14 word count: 52
152
+ Chunk 15 word count: 10
153
+ Chunk 16 word count: 11
154
+
155
+ Batch 4:
156
+ Chunk 1 word count: 145
157
+ Chunk 2 word count: 44
158
+ Chunk 3 word count: 78
159
+ Chunk 4 word count: 202
160
+ Chunk 5 word count: 32
161
+ Chunk 6 word count: 74
162
+ Chunk 7 word count: 327
163
+ Chunk 8 word count: 192
164
+ Chunk 9 word count: 316
165
+ Chunk 10 word count: 541
166
+ Chunk 11 word count: 135
167
+ Chunk 12 word count: 228
168
+ Chunk 13 word count: 890
169
+ Chunk 14 word count: 457
170
+ Chunk 15 word count: 210
171
+ Chunk 16 word count: 122
172
+
173
+ Batch 5:
174
+ Chunk 1 word count: 102
175
+ Chunk 2 word count: 635
176
+
177
+ Summary:
178
+ Chunks split by level 1 headings (#): 63
179
+ Chunks split by grouped level 2 headings (##): 3
180
+ Chunks split by paragraphs: 0
181
+ Batch 1 embeddings received, total embeddings so far: 16
182
+ Batch 2 embeddings received, total embeddings so far: 32
183
+ Batch 3 embeddings received, total embeddings so far: 48
184
+ Batch 4 embeddings received, total embeddings so far: 64
185
+ Batch 5 embeddings received, total embeddings so far: 66
186
+ Upserted points 0 to 19
187
+ Upserted points 20 to 39
188
+ Upserted points 40 to 59
189
+ Upserted points 60 to 65
190
+ Total upserted points: 66
191
+ processing complete
192
+ INFO: 10.16.32.117:27543 - "POST /api/v1/hackrx/run HTTP/1.1" 200 OK
193
+ delete done
194
+ https://hackrx.blob.core.windows.net/assets/Family%20Medicare%20Policy%20(UIN-%20UIIHLIP22070V042122)%201.pdf?sv=2023-01-03&st=2025-07-22T10%3A17%3A39Z&se=2025-08-23T10%3A17%3A00Z&sr=b&sp=r&sig=dA7BEMIZg3WcePcckBOb4QjfxK%2B4rIfxBs2%2F%2BNwoPjQ%3D
195
+ ['Is Non-infective Arthritis covered?', 'I renewed my policy yesterday, and I have been a customer for the last 6 years. Can I raise a claim for Hydrocele?', 'Is abortion covered?']
196
+
197
+ Batch 1:
198
+ Chunk 1 word count: 315
199
+ Chunk 2 word count: 90
200
+ Chunk 3 word count: 1997
201
+ Chunk 4 word count: 1939
202
+ Chunk 5 word count: 667
203
+ Chunk 6 word count: 682
204
+ Chunk 7 word count: 1612
205
+ Chunk 8 word count: 1771
206
+ Chunk 9 word count: 1956
207
+ Chunk 10 word count: 1956
208
+ Chunk 11 word count: 77
209
+ Chunk 12 word count: 1929
210
+ Chunk 13 word count: 206
211
+ Chunk 14 word count: 1886
212
+ Chunk 15 word count: 1018
213
+
214
+ Summary:
215
+ Chunks split by level 1 headings (#): 0
216
+ Chunks split by grouped level 2 headings (##): 12
217
+ Chunks split by paragraphs: 0
218
+ Batch 1 embeddings received, total embeddings so far: 15
219
+ Upserted points 0 to 14
220
+ Total upserted points: 15
221
+ processing complete
222
+ INFO: 10.16.26.149:56262 - "POST /api/v1/hackrx/run HTTP/1.1" 200 OK
223
+ delete done
224
+ https://hackrx.blob.core.windows.net/assets/indian_constitution.pdf?sv=2023-01-03&st=2025-07-28T06%3A42%3A00Z&se=2026-11-29T06%3A42%3A00Z&sr=b&sp=r&sig=5Gs%2FOXqP3zY00lgciu4BZjDV5QjTDIx7fgnfdz6Pu24%3D
225
+ ['What is the official name of India according to Article 1 of the Constitution?', 'Which Article guarantees equality before the law and equal protection of laws to all persons?', 'What is abolished by Article 17 of the Constitution?', 'What are the key ideals mentioned in the Preamble of the Constitution of India?', 'Under which Article can Parliament alter the boundaries, area, or name of an existing State?', 'According to Article 24, children below what age are prohibited from working in hazardous industries like factories or mines?', 'What is the significance of Article 21 in the Indian Constitution?', 'Article 15 prohibits discrimination on certain grounds. However, which groups can the State make special provisions for under this Article?', 'Which Article allows Parliament to regulate the right of citizenship and override previous articles on citizenship (Articles 5 to 10)?', 'What restrictions can the State impose on the right to freedom of speech under Article 19(2)?']
226
+
227
+ Batch 1:
228
+ Chunk 1 word count: 1635
229
+ Chunk 2 word count: 1807
230
+ Chunk 3 word count: 74
231
+ Chunk 4 word count: 449
232
+ Chunk 5 word count: 875
233
+ Chunk 6 word count: 207
234
+ Chunk 7 word count: 19
235
+ Chunk 8 word count: 3
236
+ Chunk 9 word count: 180
237
+ Chunk 10 word count: 15
238
+ Chunk 11 word count: 7
239
+ Chunk 12 word count: 819
240
+ Chunk 13 word count: 3
241
+ Chunk 14 word count: 32
242
+ Chunk 15 word count: 19
243
+ Chunk 16 word count: 3
244
+
245
+ Batch 2:
246
+ Chunk 1 word count: 576
247
+ Chunk 2 word count: 119
248
+ Chunk 3 word count: 1989
249
+ Chunk 4 word count: 1809
250
+ Chunk 5 word count: 1786
251
+ Chunk 6 word count: 1789
252
+ Chunk 7 word count: 1995
253
+ Chunk 8 word count: 1834
254
+ Chunk 9 word count: 1948
255
+ Chunk 10 word count: 1735
256
+ Chunk 11 word count: 1779
257
+ Chunk 12 word count: 203
258
+ Chunk 13 word count: 1981
259
+ Chunk 14 word count: 1996
260
+ Chunk 15 word count: 1895
261
+ Chunk 16 word count: 1641
262
+
263
+ Batch 3:
264
+ Chunk 1 word count: 1513
265
+ Chunk 2 word count: 1559
266
+ Chunk 3 word count: 448
267
+ Chunk 4 word count: 732
268
+ Chunk 5 word count: 837
269
+ Chunk 6 word count: 409
270
+ Chunk 7 word count: 609
271
+ Chunk 8 word count: 1677
272
+ Chunk 9 word count: 1928
273
+ Chunk 10 word count: 674
274
+ Chunk 11 word count: 474
275
+ Chunk 12 word count: 436
276
+ Chunk 13 word count: 91
277
+ Chunk 14 word count: 345
278
+ Chunk 15 word count: 430
279
+ Chunk 16 word count: 1774
280
+
281
+ Batch 4:
282
+ Chunk 1 word count: 1076
283
+ Chunk 2 word count: 1596
284
+ Chunk 3 word count: 478
285
+ Chunk 4 word count: 704
286
+ Chunk 5 word count: 1711
287
+ Chunk 6 word count: 679
288
+ Chunk 7 word count: 436
289
+ Chunk 8 word count: 1462
290
+ Chunk 9 word count: 535
291
+ Chunk 10 word count: 296
292
+ Chunk 11 word count: 29
293
+ Chunk 12 word count: 348
294
+ Chunk 13 word count: 1236
295
+ Chunk 14 word count: 420
296
+ Chunk 15 word count: 843
297
+ Chunk 16 word count: 335
298
+
299
+ Batch 5:
300
+ Chunk 1 word count: 378
301
+ Chunk 2 word count: 448
302
+ Chunk 3 word count: 1090
303
+ Chunk 4 word count: 351
304
+ Chunk 5 word count: 411
305
+ Chunk 6 word count: 229
306
+ Chunk 7 word count: 336
307
+ Chunk 8 word count: 324
308
+ Chunk 9 word count: 397
309
+ Chunk 10 word count: 356
310
+ Chunk 11 word count: 314
311
+ Chunk 12 word count: 413
312
+ Chunk 13 word count: 286
313
+ Chunk 14 word count: 294
314
+ Chunk 15 word count: 238
315
+ Chunk 16 word count: 322
316
+
317
+ Batch 6:
318
+ Chunk 1 word count: 364
319
+ Chunk 2 word count: 345
320
+ Chunk 3 word count: 371
321
+ Chunk 4 word count: 339
322
+ Chunk 5 word count: 362
323
+ Chunk 6 word count: 356
324
+ Chunk 7 word count: 253
325
+ Chunk 8 word count: 1943
326
+ Chunk 9 word count: 1000
327
+ Chunk 10 word count: 752
328
+ Chunk 11 word count: 445
329
+ Chunk 12 word count: 449
330
+ Chunk 13 word count: 401
331
+ Chunk 14 word count: 1294
332
+ Chunk 15 word count: 673
333
+ Chunk 16 word count: 392
334
+
335
+ Batch 7:
336
+ Chunk 1 word count: 56
337
+ Chunk 2 word count: 805
338
+ Chunk 3 word count: 440
339
+ Chunk 4 word count: 515
340
+ Chunk 5 word count: 338
341
+ Chunk 6 word count: 89
342
+ Chunk 7 word count: 5
343
+ Chunk 8 word count: 98
344
+ Chunk 9 word count: 350
345
+ Chunk 10 word count: 836
346
+ Chunk 11 word count: 645
347
+ Chunk 12 word count: 10
348
+ Chunk 13 word count: 463
349
+ Chunk 14 word count: 443
350
+ Chunk 15 word count: 781
351
+ Chunk 16 word count: 113
352
+
353
+ Batch 8:
354
+ Chunk 1 word count: 319
355
+ Chunk 2 word count: 439
356
+ Chunk 3 word count: 434
357
+ Chunk 4 word count: 1401
358
+ Chunk 5 word count: 323
359
+ Chunk 6 word count: 340
360
+ Chunk 7 word count: 320
361
+ Chunk 8 word count: 1129
362
+ Chunk 9 word count: 1853
363
+ Chunk 10 word count: 1986
364
+ Chunk 11 word count: 1996
365
+ Chunk 12 word count: 493
366
+ Chunk 13 word count: 348
367
+ Chunk 14 word count: 403
368
+ Chunk 15 word count: 781
369
+ Chunk 16 word count: 523
370
+
371
+ Batch 9:
372
+ Chunk 1 word count: 470
373
+ Chunk 2 word count: 374
374
+ Chunk 3 word count: 386
375
+ Chunk 4 word count: 412
376
+ Chunk 5 word count: 402
377
+ Chunk 6 word count: 419
378
+ Chunk 7 word count: 423
379
+ Chunk 8 word count: 1558
380
+ Chunk 9 word count: 402
381
+ Chunk 10 word count: 1727
382
+ Chunk 11 word count: 644
383
+ Chunk 12 word count: 298
384
+ Chunk 13 word count: 245
385
+ Chunk 14 word count: 3
386
+ Chunk 15 word count: 259
387
+ Chunk 16 word count: 1660
388
+
389
+ Batch 10:
390
+ Chunk 1 word count: 1802
391
+ Chunk 2 word count: 1802
392
+ Chunk 3 word count: 1879
393
+ Chunk 4 word count: 1601
394
+ Chunk 5 word count: 415
395
+ Chunk 6 word count: 3
396
+ Chunk 7 word count: 693
397
+ Chunk 8 word count: 318
398
+ Chunk 9 word count: 1491
399
+ Chunk 10 word count: 359
400
+ Chunk 11 word count: 364
401
+ Chunk 12 word count: 316
402
+ Chunk 13 word count: 158
403
+ Chunk 14 word count: 16
404
+ Chunk 15 word count: 320
405
+ Chunk 16 word count: 772
406
+
407
+ Batch 11:
408
+ Chunk 1 word count: 412
409
+ Chunk 2 word count: 1081
410
+ Chunk 3 word count: 295
411
+ Chunk 4 word count: 504
412
+ Chunk 5 word count: 7
413
+ Chunk 6 word count: 310
414
+ Chunk 7 word count: 715
415
+ Chunk 8 word count: 1987
416
+ Chunk 9 word count: 1836
417
+ Chunk 10 word count: 1945
418
+ Chunk 11 word count: 1869
419
+ Chunk 12 word count: 1707
420
+ Chunk 13 word count: 1809
421
+ Chunk 14 word count: 1795
422
+ Chunk 15 word count: 1958
423
+ Chunk 16 word count: 1722
424
+
425
+ Batch 12:
426
+ Chunk 1 word count: 1984
427
+ Chunk 2 word count: 1751
428
+ Chunk 3 word count: 304
429
+ Chunk 4 word count: 1926
430
+ Chunk 5 word count: 2000
431
+ Chunk 6 word count: 1642
432
+ Chunk 7 word count: 1991
433
+ Chunk 8 word count: 1843
434
+ Chunk 9 word count: 1139
435
+ Chunk 10 word count: 1966
436
+ Chunk 11 word count: 1846
437
+ Chunk 12 word count: 774
438
+
439
+ Summary:
440
+ Chunks split by level 1 headings (#): 127
441
+ Chunks split by grouped level 2 headings (##): 57
442
+ Chunks split by paragraphs: 0
443
+ Batch 1 embeddings received, total embeddings so far: 16
444
+ Batch 2 embeddings received, total embeddings so far: 32
445
+ Batch 3 embeddings received, total embeddings so far: 48
446
+ Batch 4 embeddings received, total embeddings so far: 64
447
+ Batch 5 embeddings received, total embeddings so far: 80
448
+ Batch 6 embeddings received, total embeddings so far: 96
449
+ Batch 7 embeddings received, total embeddings so far: 112
450
+ Batch 8 embeddings received, total embeddings so far: 128
451
+ Batch 9 embeddings received, total embeddings so far: 144
452
+ Batch 10 embeddings received, total embeddings so far: 160
453
+ Batch 11 embeddings received, total embeddings so far: 176
454
+ Batch 12 embeddings received, total embeddings so far: 188
455
+ Upserted points 0 to 19
456
+ Upserted points 20 to 39
457
+ Upserted points 40 to 59
458
+ Upserted points 60 to 79
459
+ Upserted points 80 to 99
460
+ Upserted points 100 to 119
461
+ Upserted points 120 to 139
462
+ Upserted points 140 to 159
463
+ Upserted points 160 to 179
464
+ Upserted points 180 to 187
465
+ Total upserted points: 188
466
+ processing complete
467
+ INFO: 10.16.21.252:54043 - "POST /api/v1/hackrx/run HTTP/1.1" 200 OK
468
+ https://hackrx.blob.core.windows.net/assets/indian_constitution.pdf?sv=2023-01-03&st=2025-07-28T06%3A42%3A00Z&se=2026-11-29T06%3A42%3A00Z&sr=b&sp=r&sig=5Gs%2FOXqP3zY00lgciu4BZjDV5QjTDIx7fgnfdz6Pu24%3D
469
+ ['If my car is stolen, what case will it be in law?', 'If I am arrested without a warrant, is that legal?', 'If someone denies me a job because of my caste, is that allowed?', 'If the government takes my land for a project, can I stop it?', 'If my child is forced to work in a factory, is that legal?', 'If I am stopped from speaking at a protest, is that against my rights?', "If a religious place stops me from entering because I'm a woman, is that constitutional?", 'If I change my religion, can the government stop me?', 'If the police torture someone in custody, what right is being violated?', "If I'm denied admission to a public university because I'm from a backward community, can I do something?"]
470
+ delete done
471
+
472
+ Batch 1:
473
+ Chunk 1 word count: 1635
474
+ Chunk 2 word count: 1807
475
+ Chunk 3 word count: 74
476
+ Chunk 4 word count: 449
477
+ Chunk 5 word count: 875
478
+ Chunk 6 word count: 207
479
+ Chunk 7 word count: 19
480
+ Chunk 8 word count: 3
481
+ Chunk 9 word count: 180
482
+ Chunk 10 word count: 15
483
+ Chunk 11 word count: 7
484
+ Chunk 12 word count: 819
485
+ Chunk 13 word count: 3
486
+ Chunk 14 word count: 32
487
+ Chunk 15 word count: 19
488
+ Chunk 16 word count: 3
489
+
490
+ Batch 2:
491
+ Chunk 1 word count: 576
492
+ Chunk 2 word count: 119
493
+ Chunk 3 word count: 1989
494
+ Chunk 4 word count: 1809
495
+ Chunk 5 word count: 1786
496
+ Chunk 6 word count: 1789
497
+ Chunk 7 word count: 1995
498
+ Chunk 8 word count: 1834
499
+ Chunk 9 word count: 1948
500
+ Chunk 10 word count: 1735
501
+ Chunk 11 word count: 1779
502
+ Chunk 12 word count: 203
503
+ Chunk 13 word count: 1981
504
+ Chunk 14 word count: 1996
505
+ Chunk 15 word count: 1895
506
+ Chunk 16 word count: 1641
507
+
508
+ Batch 3:
509
+ Chunk 1 word count: 1513
510
+ Chunk 2 word count: 1559
511
+ Chunk 3 word count: 448
512
+ Chunk 4 word count: 732
513
+ Chunk 5 word count: 837
514
+ Chunk 6 word count: 409
515
+ Chunk 7 word count: 609
516
+ Chunk 8 word count: 1677
517
+ Chunk 9 word count: 1928
518
+ Chunk 10 word count: 674
519
+ Chunk 11 word count: 474
520
+ Chunk 12 word count: 436
521
+ Chunk 13 word count: 91
522
+ Chunk 14 word count: 345
523
+ Chunk 15 word count: 430
524
+ Chunk 16 word count: 1774
525
+
526
+ Batch 4:
527
+ Chunk 1 word count: 1076
528
+ Chunk 2 word count: 1596
529
+ Chunk 3 word count: 478
530
+ Chunk 4 word count: 704
531
+ Chunk 5 word count: 1711
532
+ Chunk 6 word count: 679
533
+ Chunk 7 word count: 436
534
+ Chunk 8 word count: 1462
535
+ Chunk 9 word count: 535
536
+ Chunk 10 word count: 296
537
+ Chunk 11 word count: 29
538
+ Chunk 12 word count: 348
539
+ Chunk 13 word count: 1236
540
+ Chunk 14 word count: 420
541
+ Chunk 15 word count: 843
542
+ Chunk 16 word count: 335
543
+
544
+ Batch 5:
545
+ Chunk 1 word count: 378
546
+ Chunk 2 word count: 448
547
+ Chunk 3 word count: 1090
548
+ Chunk 4 word count: 351
549
+ Chunk 5 word count: 411
550
+ Chunk 6 word count: 229
551
+ Chunk 7 word count: 336
552
+ Chunk 8 word count: 324
553
+ Chunk 9 word count: 397
554
+ Chunk 10 word count: 356
555
+ Chunk 11 word count: 314
556
+ Chunk 12 word count: 413
557
+ Chunk 13 word count: 286
558
+ Chunk 14 word count: 294
559
+ Chunk 15 word count: 238
560
+ Chunk 16 word count: 322
561
+
562
+ Batch 6:
563
+ Chunk 1 word count: 364
564
+ Chunk 2 word count: 345
565
+ Chunk 3 word count: 371
566
+ Chunk 4 word count: 339
567
+ Chunk 5 word count: 362
568
+ Chunk 6 word count: 356
569
+ Chunk 7 word count: 253
570
+ Chunk 8 word count: 1943
571
+ Chunk 9 word count: 1000
572
+ Chunk 10 word count: 752
573
+ Chunk 11 word count: 445
574
+ Chunk 12 word count: 449
575
+ Chunk 13 word count: 401
576
+ Chunk 14 word count: 1294
577
+ Chunk 15 word count: 673
578
+ Chunk 16 word count: 392
579
+
580
+ Batch 7:
581
+ Chunk 1 word count: 56
582
+ Chunk 2 word count: 805
583
+ Chunk 3 word count: 440
584
+ Chunk 4 word count: 515
585
+ Chunk 5 word count: 338
586
+ Chunk 6 word count: 89
587
+ Chunk 7 word count: 5
588
+ Chunk 8 word count: 98
589
+ Chunk 9 word count: 350
590
+ Chunk 10 word count: 836
591
+ Chunk 11 word count: 645
592
+ Chunk 12 word count: 10
593
+ Chunk 13 word count: 463
594
+ Chunk 14 word count: 443
595
+ Chunk 15 word count: 781
596
+ Chunk 16 word count: 113
597
+
598
+ Batch 8:
599
+ Chunk 1 word count: 319
600
+ Chunk 2 word count: 439
601
+ Chunk 3 word count: 434
602
+ Chunk 4 word count: 1401
603
+ Chunk 5 word count: 323
604
+ Chunk 6 word count: 340
605
+ Chunk 7 word count: 320
606
+ Chunk 8 word count: 1129
607
+ Chunk 9 word count: 1853
608
+ Chunk 10 word count: 1986
609
+ Chunk 11 word count: 1996
610
+ Chunk 12 word count: 493
611
+ Chunk 13 word count: 348
612
+ Chunk 14 word count: 403
613
+ Chunk 15 word count: 781
614
+ Chunk 16 word count: 523
615
+
616
+ Batch 9:
617
+ Chunk 1 word count: 470
618
+ Chunk 2 word count: 374
619
+ Chunk 3 word count: 386
620
+ Chunk 4 word count: 412
621
+ Chunk 5 word count: 402
622
+ Chunk 6 word count: 419
623
+ Chunk 7 word count: 423
624
+ Chunk 8 word count: 1558
625
+ Chunk 9 word count: 402
626
+ Chunk 10 word count: 1727
627
+ Chunk 11 word count: 644
628
+ Chunk 12 word count: 298
629
+ Chunk 13 word count: 245
630
+ Chunk 14 word count: 3
631
+ Chunk 15 word count: 259
632
+ Chunk 16 word count: 1660
633
+
634
+ Batch 10:
635
+ Chunk 1 word count: 1802
636
+ Chunk 2 word count: 1802
637
+ Chunk 3 word count: 1879
638
+ Chunk 4 word count: 1601
639
+ Chunk 5 word count: 415
640
+ Chunk 6 word count: 3
641
+ Chunk 7 word count: 693
642
+ Chunk 8 word count: 318
643
+ Chunk 9 word count: 1491
644
+ Chunk 10 word count: 359
645
+ Chunk 11 word count: 364
646
+ Chunk 12 word count: 316
647
+ Chunk 13 word count: 158
648
+ Chunk 14 word count: 16
649
+ Chunk 15 word count: 320
650
+ Chunk 16 word count: 772
651
+
652
+ Batch 11:
653
+ Chunk 1 word count: 412
654
+ Chunk 2 word count: 1081
655
+ Chunk 3 word count: 295
656
+ Chunk 4 word count: 504
657
+ Chunk 5 word count: 7
658
+ Chunk 6 word count: 310
659
+ Chunk 7 word count: 715
660
+ Chunk 8 word count: 1987
661
+ Chunk 9 word count: 1836
662
+ Chunk 10 word count: 1945
663
+ Chunk 11 word count: 1869
664
+ Chunk 12 word count: 1707
665
+ Chunk 13 word count: 1809
666
+ Chunk 14 word count: 1795
667
+ Chunk 15 word count: 1958
668
+ Chunk 16 word count: 1722
669
+
670
+ Batch 12:
671
+ Chunk 1 word count: 1984
672
+ Chunk 2 word count: 1751
673
+ Chunk 3 word count: 304
674
+ Chunk 4 word count: 1926
675
+ Chunk 5 word count: 2000
676
+ Chunk 6 word count: 1642
677
+ Chunk 7 word count: 1991
678
+ Chunk 8 word count: 1843
679
+ Chunk 9 word count: 1139
680
+ Chunk 10 word count: 1966
681
+ Chunk 11 word count: 1846
682
+ Chunk 12 word count: 774
683
+
684
+ Summary:
685
+ Chunks split by level 1 headings (#): 127
686
+ Chunks split by grouped level 2 headings (##): 57
687
+ Chunks split by paragraphs: 0
688
+ Batch 1 embeddings received, total embeddings so far: 16
689
+ Batch 2 embeddings received, total embeddings so far: 32
690
+ Batch 3 embeddings received, total embeddings so far: 48
691
+ Batch 4 embeddings received, total embeddings so far: 64
692
+ Batch 5 embeddings received, total embeddings so far: 80
693
+ Batch 6 embeddings received, total embeddings so far: 96
694
+ Batch 7 embeddings received, total embeddings so far: 112
695
+ Batch 8 embeddings received, total embeddings so far: 128
696
+ Batch 9 embeddings received, total embeddings so far: 144
697
+ Batch 10 embeddings received, total embeddings so far: 160
698
+ Batch 11 embeddings received, total embeddings so far: 176
699
+ Batch 12 embeddings received, total embeddings so far: 188
700
+ Upserted points 0 to 19
701
+ Upserted points 20 to 39
702
+ Upserted points 40 to 59
703
+ Upserted points 60 to 79
704
+ Upserted points 80 to 99
705
+ Upserted points 100 to 119
706
+ Upserted points 120 to 139
707
+ Upserted points 140 to 159
708
+ Upserted points 160 to 179
709
+ Upserted points 180 to 187
710
+ Total upserted points: 188
711
+ processing complete
712
+ INFO: 10.16.21.252:20306 - "POST /api/v1/hackrx/run HTTP/1.1" 200 OK
713
+ https://hackrx.blob.core.windows.net/assets/principia_newton.pdf?sv=2023-01-03&st=2025-07-28T07%3A20%3A32Z&se=2026-07-29T07%3A20%3A00Z&sr=b&sp=r&sig=V5I1QYyigoxeUMbnUKsdEaST99F5%2FDfo7wpKg9XXF5w%3D
714
+ ["How does Newton define 'quantity of motion' and how is it distinct from 'force'?", 'According to Newton, what are the three laws of motion and how do they apply in celestial mechanics?', "How does Newton derive Kepler's Second Law (equal areas in equal times) from his laws of motion and gravitation?", 'How does Newton demonstrate that gravity is inversely proportional to the square of the distance between two masses?', "What is Newton's argument for why gravitational force must act on all masses universally?", 'How does Newton explain the perturbation of planetary orbits due to other planets?', "What mathematical tools did Newton use in Principia that were precursors to calculus, and why didn't he use standard calculus notation?", 'How does Newton use the concept of centripetal force to explain orbital motion?', 'How does Newton handle motion in resisting media, such as air or fluids?', "In what way does Newton's notion of absolute space and time differ from relative motion, and how does it support his laws?", 'Who was the grandfather of Isaac Newton?', 'Do we know any other descent of Isaac Newton apart from his grandfather?']
715
+
716
+ Batch 1:
717
+ Chunk 1 word count: 122
718
+ Chunk 2 word count: 1424
719
+ Chunk 3 word count: 1994
720
+ Chunk 4 word count: 202
721
+ Chunk 5 word count: 1945
722
+ Chunk 6 word count: 1983
723
+ Chunk 7 word count: 202
724
+ Chunk 8 word count: 1877
725
+ Chunk 9 word count: 202
726
+ Chunk 10 word count: 1967
727
+ Chunk 11 word count: 202
728
+ Chunk 12 word count: 1862
729
+ Chunk 13 word count: 202
730
+ Chunk 14 word count: 1860
731
+ Chunk 15 word count: 202
732
+ Chunk 16 word count: 1887
733
+
734
+ Batch 2:
735
+ Chunk 1 word count: 202
736
+ Chunk 2 word count: 1988
737
+ Chunk 3 word count: 202
738
+ Chunk 4 word count: 1998
739
+ Chunk 5 word count: 202
740
+ Chunk 6 word count: 1958
741
+ Chunk 7 word count: 1915
742
+ Chunk 8 word count: 2
743
+ Chunk 9 word count: 1986
744
+ Chunk 10 word count: 318
745
+ Chunk 11 word count: 1933
746
+ Chunk 12 word count: 1606
747
+ Chunk 13 word count: 1992
748
+ Chunk 14 word count: 203
749
+ Chunk 15 word count: 1901
750
+ Chunk 16 word count: 203
751
+
752
+ Batch 3:
753
+ Chunk 1 word count: 1899
754
+ Chunk 2 word count: 1202
755
+ Chunk 3 word count: 1948
756
+ Chunk 4 word count: 203
757
+ Chunk 5 word count: 1919
758
+ Chunk 6 word count: 203
759
+ Chunk 7 word count: 1948
760
+ Chunk 8 word count: 1682
761
+ Chunk 9 word count: 1784
762
+ Chunk 10 word count: 1784
763
+ Chunk 11 word count: 1984
764
+ Chunk 12 word count: 1974
765
+ Chunk 13 word count: 1046
766
+ Chunk 14 word count: 1850
767
+ Chunk 15 word count: 1966
768
+ Chunk 16 word count: 1225
769
+
770
+ Batch 4:
771
+ Chunk 1 word count: 1488
772
+ Chunk 2 word count: 1488
773
+ Chunk 3 word count: 1781
774
+ Chunk 4 word count: 1951
775
+ Chunk 5 word count: 1804
776
+ Chunk 6 word count: 203
777
+ Chunk 7 word count: 1998
778
+ Chunk 8 word count: 1885
779
+ Chunk 9 word count: 1623
780
+ Chunk 10 word count: 1933
781
+ Chunk 11 word count: 838
782
+ Chunk 12 word count: 1498
783
+ Chunk 13 word count: 796
784
+ Chunk 14 word count: 1159
785
+ Chunk 15 word count: 389
786
+ Chunk 16 word count: 19
787
+
788
+ Batch 5:
789
+ Chunk 1 word count: 336
790
+ Chunk 2 word count: 24
791
+ Chunk 3 word count: 655
792
+ Chunk 4 word count: 127
793
+ Chunk 5 word count: 108
794
+ Chunk 6 word count: 902
795
+ Chunk 7 word count: 1110
796
+ Chunk 8 word count: 1119
797
+ Chunk 9 word count: 1382
798
+ Chunk 10 word count: 393
799
+ Chunk 11 word count: 241
800
+ Chunk 12 word count: 169
801
+ Chunk 13 word count: 918
802
+ Chunk 14 word count: 169
803
+ Chunk 15 word count: 283
804
+ Chunk 16 word count: 381
805
+
806
+ Batch 6:
807
+ Chunk 1 word count: 139
808
+ Chunk 2 word count: 267
809
+ Chunk 3 word count: 675
810
+ Chunk 4 word count: 1037
811
+ Chunk 5 word count: 1900
812
+ Chunk 6 word count: 1973
813
+ Chunk 7 word count: 1949
814
+ Chunk 8 word count: 1325
815
+ Chunk 9 word count: 167
816
+ Chunk 10 word count: 567
817
+ Chunk 11 word count: 580
818
+ Chunk 12 word count: 336
819
+ Chunk 13 word count: 1436
820
+ Chunk 14 word count: 86
821
+ Chunk 15 word count: 269
822
+ Chunk 16 word count: 384
823
+
824
+ Batch 7:
825
+ Chunk 1 word count: 655
826
+ Chunk 2 word count: 840
827
+ Chunk 3 word count: 154
828
+ Chunk 4 word count: 372
829
+ Chunk 5 word count: 432
830
+ Chunk 6 word count: 567
831
+ Chunk 7 word count: 1018
832
+ Chunk 8 word count: 664
833
+ Chunk 9 word count: 269
834
+ Chunk 10 word count: 126
835
+ Chunk 11 word count: 81
836
+ Chunk 12 word count: 16
837
+ Chunk 13 word count: 308
838
+ Chunk 14 word count: 1339
839
+ Chunk 15 word count: 648
840
+ Chunk 16 word count: 1052
841
+
842
+ Batch 8:
843
+ Chunk 1 word count: 111
844
+ Chunk 2 word count: 784
845
+ Chunk 3 word count: 438
846
+ Chunk 4 word count: 25
847
+ Chunk 5 word count: 1435
848
+ Chunk 6 word count: 1584
849
+ Chunk 7 word count: 1908
850
+ Chunk 8 word count: 1707
851
+ Chunk 9 word count: 1884
852
+ Chunk 10 word count: 1521
853
+ Chunk 11 word count: 803
854
+ Chunk 12 word count: 1569
855
+ Chunk 13 word count: 203
856
+ Chunk 14 word count: 1924
857
+ Chunk 15 word count: 203
858
+ Chunk 16 word count: 1949
859
+
860
+ Batch 9:
861
+ Chunk 1 word count: 1870
862
+ Chunk 2 word count: 1714
863
+ Chunk 3 word count: 1790
864
+ Chunk 4 word count: 203
865
+ Chunk 5 word count: 1804
866
+ Chunk 6 word count: 884
867
+ Chunk 7 word count: 40
868
+ Chunk 8 word count: 856
869
+ Chunk 9 word count: 771
870
+ Chunk 10 word count: 1719
871
+ Chunk 11 word count: 354
872
+ Chunk 12 word count: 11
873
+ Chunk 13 word count: 564
874
+ Chunk 14 word count: 980
875
+ Chunk 15 word count: 1489
876
+ Chunk 16 word count: 1892
877
+
878
+ Batch 10:
879
+ Chunk 1 word count: 1821
880
+ Chunk 2 word count: 1947
881
+ Chunk 3 word count: 1550
882
+ Chunk 4 word count: 3
883
+ Chunk 5 word count: 757
884
+ Chunk 6 word count: 1580
885
+ Chunk 7 word count: 1243
886
+ Chunk 8 word count: 213
887
+ Chunk 9 word count: 1939
888
+ Chunk 10 word count: 213
889
+ Chunk 11 word count: 1894
890
+ Chunk 12 word count: 1737
891
+ Chunk 13 word count: 1599
892
+ Chunk 14 word count: 1387
893
+ Chunk 15 word count: 959
894
+ Chunk 16 word count: 94
895
+
896
+ Batch 11:
897
+ Chunk 1 word count: 2000
898
+ Chunk 2 word count: 1
899
+ Chunk 3 word count: 213
900
+ Chunk 4 word count: 1869
901
+ Chunk 5 word count: 213
902
+ Chunk 6 word count: 1896
903
+ Chunk 7 word count: 613
904
+ Chunk 8 word count: 1463
905
+ Chunk 9 word count: 1541
906
+ Chunk 10 word count: 481
907
+ Chunk 11 word count: 1447
908
+ Chunk 12 word count: 791
909
+ Chunk 13 word count: 213
910
+ Chunk 14 word count: 1911
911
+ Chunk 15 word count: 361
912
+ Chunk 16 word count: 44
913
+
914
+ Batch 12:
915
+ Chunk 1 word count: 915
916
+ Chunk 2 word count: 1995
917
+ Chunk 3 word count: 414
918
+ Chunk 4 word count: 817
919
+ Chunk 5 word count: 446
920
+ Chunk 6 word count: 545
921
+ Chunk 7 word count: 348
922
+ Chunk 8 word count: 869
923
+ Chunk 9 word count: 89
924
+ Chunk 10 word count: 1920
925
+ Chunk 11 word count: 206
926
+ Chunk 12 word count: 1951
927
+ Chunk 13 word count: 1993
928
+ Chunk 14 word count: 1704
929
+ Chunk 15 word count: 1975
930
+ Chunk 16 word count: 1240
931
+
932
+ Batch 13:
933
+ Chunk 1 word count: 173
934
+ Chunk 2 word count: 1996
935
+ Chunk 3 word count: 1924
936
+ Chunk 4 word count: 1370
937
+ Chunk 5 word count: 1716
938
+ Chunk 6 word count: 1596
939
+ Chunk 7 word count: 1973
940
+ Chunk 8 word count: 1631
941
+ Chunk 9 word count: 900
942
+ Chunk 10 word count: 206
943
+ Chunk 11 word count: 1993
944
+ Chunk 12 word count: 206
945
+ Chunk 13 word count: 1972
946
+ Chunk 14 word count: 1522
947
+ Chunk 15 word count: 788
948
+ Chunk 16 word count: 75
949
+
950
+ Batch 14:
951
+ Chunk 1 word count: 1945
952
+ Chunk 2 word count: 1607
953
+ Chunk 3 word count: 206
954
+ Chunk 4 word count: 1810
955
+ Chunk 5 word count: 1309
956
+ Chunk 6 word count: 206
957
+ Chunk 7 word count: 1892
958
+ Chunk 8 word count: 1903
959
+ Chunk 9 word count: 1391
960
+ Chunk 10 word count: 1006
961
+ Chunk 11 word count: 206
962
+ Chunk 12 word count: 1951
963
+ Chunk 13 word count: 1935
964
+ Chunk 14 word count: 1933
965
+ Chunk 15 word count: 545
966
+ Chunk 16 word count: 206
967
+
968
+ Batch 15:
969
+ Chunk 1 word count: 1802
970
+ Chunk 2 word count: 206
971
+ Chunk 3 word count: 1882
972
+ Chunk 4 word count: 206
973
+ Chunk 5 word count: 1961
974
+ Chunk 6 word count: 206
975
+ Chunk 7 word count: 1843
976
+ Chunk 8 word count: 206
977
+ Chunk 9 word count: 1865
978
+ Chunk 10 word count: 206
979
+ Chunk 11 word count: 1812
980
+ Chunk 12 word count: 1994
981
+ Chunk 13 word count: 956
982
+ Chunk 14 word count: 206
983
+ Chunk 15 word count: 1948
984
+ Chunk 16 word count: 206
985
+
986
+ Batch 16:
987
+ Chunk 1 word count: 1854
988
+ Chunk 2 word count: 1789
989
+ Chunk 3 word count: 1996
990
+ Chunk 4 word count: 744
991
+ Chunk 5 word count: 32
992
+ Chunk 6 word count: 1991
993
+ Chunk 7 word count: 206
994
+ Chunk 8 word count: 2000
995
+ Chunk 9 word count: 1
996
+ Chunk 10 word count: 206
997
+ Chunk 11 word count: 1992
998
+ Chunk 12 word count: 206
999
+ Chunk 13 word count: 1970
1000
+ Chunk 14 word count: 1934
1001
+ Chunk 15 word count: 1962
1002
+ Chunk 16 word count: 206
1003
+
1004
+ Batch 17:
1005
+ Chunk 1 word count: 1956
1006
+ Chunk 2 word count: 206
1007
+ Chunk 3 word count: 1842
1008
+ Chunk 4 word count: 206
1009
+ Chunk 5 word count: 1916
1010
+ Chunk 6 word count: 206
1011
+ Chunk 7 word count: 1896
1012
+ Chunk 8 word count: 206
1013
+ Chunk 9 word count: 1942
1014
+ Chunk 10 word count: 1998
1015
+ Chunk 11 word count: 206
1016
+ Chunk 12 word count: 1802
1017
+ Chunk 13 word count: 1959
1018
+ Chunk 14 word count: 206
1019
+ Chunk 15 word count: 1795
1020
+ Chunk 16 word count: 206
1021
+
1022
+ Batch 18:
1023
+ Chunk 1 word count: 1909
1024
+ Chunk 2 word count: 206
1025
+ Chunk 3 word count: 1962
1026
+ Chunk 4 word count: 1212
1027
+ Chunk 5 word count: 1713
1028
+ Chunk 6 word count: 206
1029
+ Chunk 7 word count: 2000
1030
+ Chunk 8 word count: 3
1031
+ Chunk 9 word count: 206
1032
+ Chunk 10 word count: 1957
1033
+ Chunk 11 word count: 610
1034
+ Chunk 12 word count: 158
1035
+
1036
+ Summary:
1037
+ Chunks split by level 1 headings (#): 70
1038
+ Chunks split by grouped level 2 headings (##): 146
1039
+ Chunks split by paragraphs: 9
1040
+ Batch 1 embeddings received, total embeddings so far: 16
1041
+ Batch 2 embeddings received, total embeddings so far: 32
1042
+ Batch 3 embeddings received, total embeddings so far: 48
1043
+ Batch 4 embeddings received, total embeddings so far: 64
1044
+ Batch 5 embeddings received, total embeddings so far: 80
1045
+ Batch 6 embeddings received, total embeddings so far: 96
1046
+ Batch 7 embeddings received, total embeddings so far: 112
1047
+ Batch 8 embeddings received, total embeddings so far: 128
1048
+ Batch 9 embeddings received, total embeddings so far: 144
1049
+ Batch 10 embeddings received, total embeddings so far: 160
1050
+ Batch 11 embeddings received, total embeddings so far: 176
1051
+ Batch 12 embeddings received, total embeddings so far: 192
1052
+ Batch 13 embeddings received, total embeddings so far: 208
1053
+ Batch 14 embeddings received, total embeddings so far: 224
1054
+ Batch 15 embeddings received, total embeddings so far: 240
1055
+ Batch 16 embeddings received, total embeddings so far: 256
1056
+ Batch 17 embeddings received, total embeddings so far: 272
1057
+ Batch 18 embeddings received, total embeddings so far: 284
1058
+ Upserted points 0 to 19
1059
+ Upserted points 20 to 39
1060
+ Upserted points 40 to 59
1061
+ Upserted points 60 to 79
1062
+ Upserted points 80 to 99
1063
+ Upserted points 100 to 119
1064
+ Upserted points 120 to 139
1065
+ Upserted points 140 to 159
1066
+ Upserted points 160 to 179
1067
+ Upserted points 180 to 199
1068
+ Upserted points 200 to 219
1069
+ Upserted points 220 to 239
1070
+ Upserted points 240 to 259
1071
+ Upserted points 260 to 279
1072
+ Upserted points 280 to 283
1073
+ Total upserted points: 284
1074
+ processing complete
1075
+ INFO: 10.16.32.117:37373 - "POST /api/v1/hackrx/run HTTP/1.1" 200 OK
1076
+ ERROR: Exception in ASGI application
1077
+ Traceback (most recent call last):
1078
+ File "/home/user/.local/lib/python3.9/site-packages/uvicorn/protocols/http/h11_impl.py", line 403, in run_asgi
1079
+ result = await app( # type: ignore[func-returns-value]
1080
+ File "/home/user/.local/lib/python3.9/site-packages/uvicorn/middleware/proxy_headers.py", line 60, in __call__
1081
+ return await self.app(scope, receive, send)
1082
+ File "/home/user/.local/lib/python3.9/site-packages/fastapi/applications.py", line 1054, in __call__
1083
+ await super().__call__(scope, receive, send)
1084
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/applications.py", line 113, in __call__
1085
+ await self.middleware_stack(scope, receive, send)
1086
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/middleware/errors.py", line 186, in __call__
1087
+ raise exc
1088
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/middleware/errors.py", line 164, in __call__
1089
+ await self.app(scope, receive, _send)
1090
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/middleware/cors.py", line 85, in __call__
1091
+ await self.app(scope, receive, send)
1092
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/middleware/exceptions.py", line 63, in __call__
1093
+ await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
1094
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
1095
+ raise exc
1096
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
1097
+ await app(scope, receive, sender)
1098
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/routing.py", line 716, in __call__
1099
+ await self.middleware_stack(scope, receive, send)
1100
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/routing.py", line 736, in app
1101
+ await route.handle(scope, receive, send)
1102
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/routing.py", line 290, in handle
1103
+ await self.app(scope, receive, send)
1104
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/routing.py", line 78, in app
1105
+ await wrap_app_handling_exceptions(app, request)(scope, receive, send)
1106
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
1107
+ raise exc
1108
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
1109
+ await app(scope, receive, sender)
1110
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/routing.py", line 76, in app
1111
+ await response(scope, receive, send)
1112
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/responses.py", line 168, in __call__
1113
+ await self.background()
1114
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/background.py", line 42, in __call__
1115
+ await task()
1116
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/background.py", line 29, in __call__
1117
+ await run_in_threadpool(self.func, *self.args, **self.kwargs)
1118
+ File "/home/user/.local/lib/python3.9/site-packages/starlette/concurrency.py", line 38, in run_in_threadpool
1119
+ return await anyio.to_thread.run_sync(func)
1120
+ File "/home/user/.local/lib/python3.9/site-packages/anyio/to_thread.py", line 56, in run_sync
1121
+ return await get_async_backend().run_sync_in_worker_thread(
1122
+ File "/home/user/.local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 2470, in run_sync_in_worker_thread
1123
+ return await future
1124
+ File "/home/user/.local/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 967, in run
1125
+ result = context.run(func, *args)
1126
+ File "/app/qdrant_setup.py", line 83, in clear_collection_payloads
1127
+ client.create_collection(
1128
+ File "/home/user/.local/lib/python3.9/site-packages/qdrant_client/qdrant_client.py", line 2382, in create_collection
1129
+ return self._client.create_collection(
1130
+ File "/home/user/.local/lib/python3.9/site-packages/qdrant_client/qdrant_remote.py", line 2815, in create_collection
1131
+ result: Optional[bool] = self.http.collections_api.create_collection(
1132
+ File "/home/user/.local/lib/python3.9/site-packages/qdrant_client/http/api/collections_api.py", line 294, in create_collection
1133
+ return self._build_for_create_collection(
1134
+ File "/home/user/.local/lib/python3.9/site-packages/qdrant_client/http/api/collections_api.py", line 96, in _build_for_create_collection
1135
+ return self.api_client.request(
1136
+ File "/home/user/.local/lib/python3.9/site-packages/qdrant_client/http/api_client.py", line 95, in request
1137
+ return self.send(request, type_)
1138
+ File "/home/user/.local/lib/python3.9/site-packages/qdrant_client/http/api_client.py", line 130, in send
1139
+ raise UnexpectedResponse.for_response(response)
1140
+ qdrant_client.http.exceptions.UnexpectedResponse: Unexpected Response: 409 (Conflict)
1141
+ Raw response content:
1142
+ b'{"status":{"error":"Wrong input: Collection `test` already exists!"},"time":0.128498264}'
1143
+
1144
+ Want to edit your Spaces's metadata? Head to the README.md and metadata UI instead.
1145
+ Space Hardware
1146
+ Display price:
1147
+
1148
+
1149
+ per hour
1150
+ per month
1151
+ Choose a hardware for your Space.
1152
+
1153
+ You'll be billed on a per minute basis.
1154
+ View usage in your billing settings.
1155
+
1156
+ Sleep time settings
1157
+ Sleep after
1158
+ 48 hours
1159
+ of inactivity
1160
+ Upgrade to a paid Hardware to set a custom sleep time.
1161
+
1162
+ Pause Space
1163
+
1164
+ Building something cool as a side project?
1165
+ Apply for a community GPU grant.
1166
+
1167
+
1168
+ CPU basic
1169
+ 2 vCPU
1170
+
1171
+ 16 GB RAM
1172
+ Current 路 Free
1173
+
1174
+
1175
+ CPU upgrade
1176
+ 8 vCPU
1177
+
1178
+ 32 GB RAM
1179
+ $0.03/hour
1180
+
1181
+ ZeroGPU
1182
+ Dynamic resources
1183
+
1184
+ Gradio only
1185
+ Free
1186
+
1187
+ Nvidia T4 small
1188
+ 4 vCPU
1189
+
1190
+ 15 GB RAM
1191
+
1192
+ 16 GB VRAM
1193
+ $0.40/hour
1194
+
1195
+ Nvidia T4 medium
1196
+ 8 vCPU
1197
+
1198
+ 30 GB RAM
1199
+
1200
+ 16 GB VRAM
1201
+ $0.60/hour
1202
+
1203
+ Nvidia 1xL4
1204
+ 8 vCPU
1205
+
1206
+ 30 GB RAM
1207
+
1208
+ 24 GB VRAM
1209
+ $0.80/hour
1210
+
1211
+ Nvidia 4xL4
1212
+ 48 vCPU
1213
+
1214
+ 186 GB RAM
1215
+
1216
+ 96 GB VRAM
1217
+ $3.80/hour
1218
+
1219
+ Nvidia 1xL40S
1220
+ 8 vCPU
1221
+
1222
+ 62 GB RAM
1223
+
1224
+ 48 GB VRAM
1225
+ $1.80/hour
1226
+
1227
+ Nvidia 4xL40S
1228
+ 48 vCPU
1229
+
1230
+ 382 GB RAM
1231
+
1232
+ 192 GB VRAM
1233
+ $8.30/hour
1234
+
1235
+ Nvidia 8xL40S
1236
+ 192 vCPU
1237
+
1238
+ 1534 GB RAM
1239
+
1240
+ 384 GB VRAM
1241
+ $23.50/hour
1242
+
1243
+ Nvidia A10G small
1244
+ 4 vCPU
1245
+
1246
+ 15 GB RAM
1247
+
1248
+ 24 GB VRAM
1249
+ $1.00/hour
tokenizer.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Tuple
2
+
3
+ from tiktoken import get_encoding
4
+ from transformers.tokenization_utils_base import PreTrainedTokenizerBase
5
+
6
+
7
+ # Create a wrapper class to make OpenAI's tokenizer compatible with the HybridChunker interface
8
+ class OpenAITokenizerWrapper(PreTrainedTokenizerBase):
9
+ """Minimal wrapper for OpenAI's tokenizer."""
10
+
11
+ def __init__(
12
+ self, model_name: str = "cl100k_base", max_length: int = 8191, **kwargs
13
+ ):
14
+ """Initialize the tokenizer.
15
+
16
+ Args:
17
+ model_name: The name of the OpenAI encoding to use
18
+ max_length: Maximum sequence length
19
+ """
20
+ super().__init__(model_max_length=max_length, **kwargs)
21
+ self.tokenizer = get_encoding(model_name)
22
+ self._vocab_size = self.tokenizer.max_token_value
23
+
24
+ def tokenize(self, text: str, **kwargs) -> List[str]:
25
+ """Main method used by HybridChunker."""
26
+ return [str(t) for t in self.tokenizer.encode(text)]
27
+
28
+ def _tokenize(self, text: str) -> List[str]:
29
+ return self.tokenize(text)
30
+
31
+ def _convert_token_to_id(self, token: str) -> int:
32
+ return int(token)
33
+
34
+ def _convert_id_to_token(self, index: int) -> str:
35
+ return str(index)
36
+
37
+ def get_vocab(self) -> Dict[str, int]:
38
+ return dict(enumerate(range(self.vocab_size)))
39
+
40
+ @property
41
+ def vocab_size(self) -> int:
42
+ return self._vocab_size
43
+
44
+ def save_vocabulary(self, *args) -> Tuple[str]:
45
+ return ()
46
+
47
+ @classmethod
48
+ def from_pretrained(cls, *args, **kwargs):
49
+ """Class method to match HuggingFace's interface."""
50
+ return cls()
uv.lock CHANGED
The diff for this file is too large to render. See raw diff