anakib1 commited on
Commit
bed851f
Β·
1 Parent(s): 4970198

Fixed book

Browse files
Files changed (2) hide show
  1. src/clients.py +2 -2
  2. src/complex.ipynb +16 -17
src/clients.py CHANGED
@@ -12,8 +12,8 @@ def embed_pdf(folder: str = 'data', name: str = 'book.pdf'):
12
  path = pathlib.Path(folder).joinpath(name)
13
  if not path.exists():
14
  print('Downloading book PDF.')
15
- gdown.download('https://drive.google.com/file/d/1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z/view?usp=sharing',
16
- str(path))
17
  loader = PyPDFLoader(str(path))
18
  documents = loader.load()
19
  splitter = RecursiveCharacterTextSplitter(
 
12
  path = pathlib.Path(folder).joinpath(name)
13
  if not path.exists():
14
  print('Downloading book PDF.')
15
+ gdown.download('https://drive.google.com/file/d/1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z/view?usp=drive_link',
16
+ str(path), fuzzy=True)
17
  loader = PyPDFLoader(str(path))
18
  documents = loader.load()
19
  splitter = RecursiveCharacterTextSplitter(
src/complex.ipynb CHANGED
@@ -223,7 +223,7 @@
223
  },
224
  {
225
  "cell_type": "code",
226
- "execution_count": 20,
227
  "outputs": [
228
  {
229
  "name": "stdout",
@@ -236,33 +236,32 @@
236
  "name": "stderr",
237
  "output_type": "stream",
238
  "text": [
239
- "C:\\Users\\bsvja\\anaconda3\\envs\\pdf-rag\\Lib\\site-packages\\gdown\\parse_url.py:48: UserWarning: You specified a Google Drive link that is not the correct link to download a file. You might want to try `--fuzzy` option or the following url: https://drive.google.com/uc?id=1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z\n",
240
- " warnings.warn(\n",
241
  "Downloading...\n",
242
- "From: https://drive.google.com/file/d/1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z/view?usp=sharing\n",
243
  "To: C:\\Users\\bsvja\\PycharmProjects\\pdf-rag\\data\\book.pdf\n",
244
- "87.1kB [00:00, 9.67MB/s]\n"
245
  ]
246
  }
247
  ],
248
  "source": [
249
- "import gdown\n",
250
- "pathlib.Path(r'C:\\Users\\bsvja\\PycharmProjects\\pdf-rag\\data').mkdir(exist_ok=True)\n",
251
- "\n",
252
- "path = pathlib.Path(r'C:\\Users\\bsvja\\PycharmProjects\\pdf-rag\\data').joinpath('book.pdf')\n",
253
- "if not path.exists():\n",
254
- " print('Downloading book PDF.')\n",
255
- " gdown.download('https://drive.google.com/file/d/1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z/view?usp=sharing',\n",
256
- " str(path))"
 
257
  ],
258
  "metadata": {
259
  "collapsed": false,
260
  "ExecuteTime": {
261
- "end_time": "2024-04-09T15:30:51.810214700Z",
262
- "start_time": "2024-04-09T15:30:50.516603500Z"
263
  }
264
  },
265
- "id": "f1006423bcc8b35b"
266
  },
267
  {
268
  "cell_type": "code",
@@ -272,7 +271,7 @@
272
  "metadata": {
273
  "collapsed": false
274
  },
275
- "id": "b0a54b5e476b46e0"
276
  }
277
  ],
278
  "metadata": {
 
223
  },
224
  {
225
  "cell_type": "code",
226
+ "execution_count": 25,
227
  "outputs": [
228
  {
229
  "name": "stdout",
 
236
  "name": "stderr",
237
  "output_type": "stream",
238
  "text": [
 
 
239
  "Downloading...\n",
240
+ "From: https://drive.google.com/uc?id=1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z\n",
241
  "To: C:\\Users\\bsvja\\PycharmProjects\\pdf-rag\\data\\book.pdf\n",
242
+ "100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 2.37M/2.37M [00:00<00:00, 4.19MB/s]\n"
243
  ]
244
  }
245
  ],
246
  "source": [
247
+ "def embed_pdf(folder: str = 'data', name: str = 'book.pdf'):\n",
248
+ " pathlib.Path(folder).mkdir(exist_ok=True)\n",
249
+ " path = pathlib.Path(folder).joinpath(name)\n",
250
+ " if not path.exists():\n",
251
+ " print('Downloading book PDF.')\n",
252
+ " gdown.download('https://drive.google.com/file/d/1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z/view?usp=drive_link',\n",
253
+ " str(path), fuzzy=True)\n",
254
+ " \n",
255
+ "embed_pdf('../data')"
256
  ],
257
  "metadata": {
258
  "collapsed": false,
259
  "ExecuteTime": {
260
+ "end_time": "2024-04-09T18:00:13.190953100Z",
261
+ "start_time": "2024-04-09T18:00:03.632167700Z"
262
  }
263
  },
264
+ "id": "b0a54b5e476b46e0"
265
  },
266
  {
267
  "cell_type": "code",
 
271
  "metadata": {
272
  "collapsed": false
273
  },
274
+ "id": "5c977fcc519c1a6e"
275
  }
276
  ],
277
  "metadata": {