LiamKhoaLe commited on
Commit
92f1cfa
·
1 Parent(s): 6d6636b

Upd textbook fetcher

Browse files
Files changed (2) hide show
  1. app/db.py +12 -1
  2. app/routers/import_doc.py +13 -2
app/db.py CHANGED
@@ -30,4 +30,15 @@ async def save_to_textbook_fs(doc_id: str, file_path: str):
30
  # Log
31
  logger.info(f"📦 PDF also stored to textbook bucket at: {TEXTBOOK_URI}")
32
  except Exception as e:
33
- logger.warning(f"⚠️ Failed to save to textbook storage: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
30
  # Log
31
  logger.info(f"📦 PDF also stored to textbook bucket at: {TEXTBOOK_URI}")
32
  except Exception as e:
33
+ logger.warning(f"⚠️ Failed to save to textbook storage: {e}")
34
+
35
+ # == PDF FETCHER ==
36
+ async def fetch_textbook_pdf(doc_id: str):
37
+ try:
38
+ textbook_client = AsyncIOMotorClient(TEXTBOOK_URI)
39
+ textbook_db = textbook_client.get_default_database()
40
+ textbook_fs = AsyncIOMotorGridFSBucket(textbook_db)
41
+ return await textbook_fs.open_download_stream_by_name(f"{doc_id}.pdf")
42
+ except Exception as e:
43
+ logger.warning(f"⚠️ Failed to fetch textbook PDF for {doc_id}: {e}")
44
+ raise
app/routers/import_doc.py CHANGED
@@ -1,7 +1,8 @@
1
- # app/routers/import.py
2
  from fastapi import APIRouter, HTTPException
 
3
  from pydantic import BaseModel
4
- from app.db import get_db, get_gridfs, save_to_textbook_fs
5
  from app.services import google_books, open_library, internet_archive, project_gutenberg
6
  from app.services.ingest import parse_and_index
7
  import aiofiles, uuid, os
@@ -19,6 +20,7 @@ class ImportRequest(BaseModel):
19
  source: str
20
  ref: dict
21
 
 
22
  @router.post("")
23
  async def import_book(req: ImportRequest):
24
  logger.info(f"📥 Received import request: {req.dict()}")
@@ -84,3 +86,12 @@ async def import_book(req: ImportRequest):
84
  logger.info(f"📚 Document {req.candidate_id} queued for indexing")
85
  return {"document_id": req.candidate_id, "status": "queued"}
86
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/routers/import_doc.py
2
  from fastapi import APIRouter, HTTPException
3
+ from fastapi.responses import StreamingResponse
4
  from pydantic import BaseModel
5
+ from app.db import get_db, get_gridfs, save_to_textbook_fs, fetch_textbook_pdf
6
  from app.services import google_books, open_library, internet_archive, project_gutenberg
7
  from app.services.ingest import parse_and_index
8
  import aiofiles, uuid, os
 
20
  source: str
21
  ref: dict
22
 
23
+ # Embedding, query and PDF saver to buckets
24
  @router.post("")
25
  async def import_book(req: ImportRequest):
26
  logger.info(f"📥 Received import request: {req.dict()}")
 
86
  logger.info(f"📚 Document {req.candidate_id} queued for indexing")
87
  return {"document_id": req.candidate_id, "status": "queued"}
88
 
89
+ # Fetch textbook on id
90
+ @router.get("/textbook/{doc_id}")
91
+ async def get_textbook(doc_id: str):
92
+ try:
93
+ stream = await fetch_textbook_pdf(doc_id)
94
+ return StreamingResponse(stream, media_type="application/pdf")
95
+ except Exception as e:
96
+ logger.error(f"❌ Failed to serve textbook {doc_id}: {e}")
97
+ raise HTTPException(404, "Textbook not found")