Fixed book
Browse files- src/clients.py +2 -2
- src/complex.ipynb +16 -17
src/clients.py
CHANGED
|
@@ -12,8 +12,8 @@ def embed_pdf(folder: str = 'data', name: str = 'book.pdf'):
|
|
| 12 |
path = pathlib.Path(folder).joinpath(name)
|
| 13 |
if not path.exists():
|
| 14 |
print('Downloading book PDF.')
|
| 15 |
-
gdown.download('https://drive.google.com/file/d/1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z/view?usp=
|
| 16 |
-
str(path))
|
| 17 |
loader = PyPDFLoader(str(path))
|
| 18 |
documents = loader.load()
|
| 19 |
splitter = RecursiveCharacterTextSplitter(
|
|
|
|
| 12 |
path = pathlib.Path(folder).joinpath(name)
|
| 13 |
if not path.exists():
|
| 14 |
print('Downloading book PDF.')
|
| 15 |
+
gdown.download('https://drive.google.com/file/d/1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z/view?usp=drive_link',
|
| 16 |
+
str(path), fuzzy=True)
|
| 17 |
loader = PyPDFLoader(str(path))
|
| 18 |
documents = loader.load()
|
| 19 |
splitter = RecursiveCharacterTextSplitter(
|
src/complex.ipynb
CHANGED
|
@@ -223,7 +223,7 @@
|
|
| 223 |
},
|
| 224 |
{
|
| 225 |
"cell_type": "code",
|
| 226 |
-
"execution_count":
|
| 227 |
"outputs": [
|
| 228 |
{
|
| 229 |
"name": "stdout",
|
|
@@ -236,33 +236,32 @@
|
|
| 236 |
"name": "stderr",
|
| 237 |
"output_type": "stream",
|
| 238 |
"text": [
|
| 239 |
-
"C:\\Users\\bsvja\\anaconda3\\envs\\pdf-rag\\Lib\\site-packages\\gdown\\parse_url.py:48: UserWarning: You specified a Google Drive link that is not the correct link to download a file. You might want to try `--fuzzy` option or the following url: https://drive.google.com/uc?id=1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z\n",
|
| 240 |
-
" warnings.warn(\n",
|
| 241 |
"Downloading...\n",
|
| 242 |
-
"From: https://drive.google.com/
|
| 243 |
"To: C:\\Users\\bsvja\\PycharmProjects\\pdf-rag\\data\\book.pdf\n",
|
| 244 |
-
"
|
| 245 |
]
|
| 246 |
}
|
| 247 |
],
|
| 248 |
"source": [
|
| 249 |
-
"
|
| 250 |
-
"pathlib.Path(
|
| 251 |
-
"\n",
|
| 252 |
-
"
|
| 253 |
-
"
|
| 254 |
-
"
|
| 255 |
-
"
|
| 256 |
-
"
|
|
|
|
| 257 |
],
|
| 258 |
"metadata": {
|
| 259 |
"collapsed": false,
|
| 260 |
"ExecuteTime": {
|
| 261 |
-
"end_time": "2024-04-
|
| 262 |
-
"start_time": "2024-04-
|
| 263 |
}
|
| 264 |
},
|
| 265 |
-
"id": "
|
| 266 |
},
|
| 267 |
{
|
| 268 |
"cell_type": "code",
|
|
@@ -272,7 +271,7 @@
|
|
| 272 |
"metadata": {
|
| 273 |
"collapsed": false
|
| 274 |
},
|
| 275 |
-
"id": "
|
| 276 |
}
|
| 277 |
],
|
| 278 |
"metadata": {
|
|
|
|
| 223 |
},
|
| 224 |
{
|
| 225 |
"cell_type": "code",
|
| 226 |
+
"execution_count": 25,
|
| 227 |
"outputs": [
|
| 228 |
{
|
| 229 |
"name": "stdout",
|
|
|
|
| 236 |
"name": "stderr",
|
| 237 |
"output_type": "stream",
|
| 238 |
"text": [
|
|
|
|
|
|
|
| 239 |
"Downloading...\n",
|
| 240 |
+
"From: https://drive.google.com/uc?id=1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z\n",
|
| 241 |
"To: C:\\Users\\bsvja\\PycharmProjects\\pdf-rag\\data\\book.pdf\n",
|
| 242 |
+
"100%|ββββββββββ| 2.37M/2.37M [00:00<00:00, 4.19MB/s]\n"
|
| 243 |
]
|
| 244 |
}
|
| 245 |
],
|
| 246 |
"source": [
|
| 247 |
+
"def embed_pdf(folder: str = 'data', name: str = 'book.pdf'):\n",
|
| 248 |
+
" pathlib.Path(folder).mkdir(exist_ok=True)\n",
|
| 249 |
+
" path = pathlib.Path(folder).joinpath(name)\n",
|
| 250 |
+
" if not path.exists():\n",
|
| 251 |
+
" print('Downloading book PDF.')\n",
|
| 252 |
+
" gdown.download('https://drive.google.com/file/d/1CwhFM4gInp9xV4G4sdnYE_rN0StmqQ2z/view?usp=drive_link',\n",
|
| 253 |
+
" str(path), fuzzy=True)\n",
|
| 254 |
+
" \n",
|
| 255 |
+
"embed_pdf('../data')"
|
| 256 |
],
|
| 257 |
"metadata": {
|
| 258 |
"collapsed": false,
|
| 259 |
"ExecuteTime": {
|
| 260 |
+
"end_time": "2024-04-09T18:00:13.190953100Z",
|
| 261 |
+
"start_time": "2024-04-09T18:00:03.632167700Z"
|
| 262 |
}
|
| 263 |
},
|
| 264 |
+
"id": "b0a54b5e476b46e0"
|
| 265 |
},
|
| 266 |
{
|
| 267 |
"cell_type": "code",
|
|
|
|
| 271 |
"metadata": {
|
| 272 |
"collapsed": false
|
| 273 |
},
|
| 274 |
+
"id": "5c977fcc519c1a6e"
|
| 275 |
}
|
| 276 |
],
|
| 277 |
"metadata": {
|