LiamKhoaLe commited on
Commit
eb2e1af
·
1 Parent(s): 7626f3a

Upd debug logs

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app/.DS_Store CHANGED
Binary files a/app/.DS_Store and b/app/.DS_Store differ
 
app/main.py CHANGED
@@ -18,6 +18,18 @@
18
  from fastapi import FastAPI, WebSocket
19
  from app.routers import search, import_doc
20
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  app = FastAPI()
22
 
23
  app.include_router(search.router, prefix="/search")
 
18
  from fastapi import FastAPI, WebSocket
19
  from app.routers import search, import_doc
20
 
21
+ # Debugger
22
+ import logging
23
+ logging.basicConfig(
24
+ level=logging.DEBUG,
25
+ format="%(asctime)s — %(name)s — %(levelname)s — %(message)s",
26
+ force=True
27
+ )
28
+ logger = logging.getLogger("book-query")
29
+ logger.setLevel(logging.DEBUG)
30
+ logger.info("🚀 Starting Tutor Book Querier...")
31
+
32
+
33
  app = FastAPI()
34
 
35
  app.include_router(search.router, prefix="/search")
app/routers/import_doc.py CHANGED
@@ -8,6 +8,9 @@ import aiofiles, uuid, os
8
  import asyncio
9
  import httpx
10
 
 
 
 
11
  router = APIRouter()
12
 
13
  class ImportRequest(BaseModel):
@@ -18,45 +21,62 @@ class ImportRequest(BaseModel):
18
 
19
  @router.post("")
20
  async def import_book(req: ImportRequest):
 
21
  source_lookup = {
22
  "google": google_books.fetch,
23
  "openlibrary": open_library.fetch,
24
  "ia": internet_archive.fetch
25
  }
26
  if req.source not in source_lookup:
 
27
  raise HTTPException(400, "Invalid source")
28
 
29
  result = await source_lookup[req.source](req.ref)
30
- print(f"[DEBUG] Import source result: {result}") # We need to debug out the result from Google API
31
 
32
  # Debugs
33
  if not result:
34
- print("[INFO] No download result returned from fetch().")
35
  raise HTTPException(403, "Download not permitted")
36
  if not result.get("download_url"):
37
- print(f"[INFO] No download URL. Viewability: {result.get('viewability', 'N/A')}")
38
  raise HTTPException(403, "Download not permitted")
39
 
40
  # Write temp file and save as Pdf from downloadable link
41
  download_url = result["download_url"]
42
  file_path = f"/tmp/{req.candidate_id}.pdf"
 
43
 
44
- async with aiofiles.open(file_path, mode='wb') as f:
45
- async with httpx.AsyncClient() as client:
46
- r = await client.get(download_url)
47
- await f.write(r.content)
48
-
49
- with open(file_path, "rb") as f:
50
- await grid_fs_bucket.upload_from_stream(f"{req.candidate_id}.pdf", f)
51
-
52
- os.remove(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  doc = {
54
  "_id": req.candidate_id,
55
  "title": req.title,
56
  "status": "queued",
57
  "metadata": result
58
  }
 
59
  asyncio.create_task(parse_and_index(req.candidate_id))
60
-
61
  return {"document_id": req.candidate_id, "status": "queued"}
62
 
 
8
  import asyncio
9
  import httpx
10
 
11
+ import logging
12
+ logger = logging.getLogger("book-query")
13
+
14
  router = APIRouter()
15
 
16
  class ImportRequest(BaseModel):
 
21
 
22
  @router.post("")
23
  async def import_book(req: ImportRequest):
24
+ logger.info(f"📥 Received import request: {req.dict()}")
25
  source_lookup = {
26
  "google": google_books.fetch,
27
  "openlibrary": open_library.fetch,
28
  "ia": internet_archive.fetch
29
  }
30
  if req.source not in source_lookup:
31
+ logger.warning(f"❌ Invalid source: {req.source}")
32
  raise HTTPException(400, "Invalid source")
33
 
34
  result = await source_lookup[req.source](req.ref)
35
+ logger.debug(f"🔎 Fetch result for ref {req.ref}: {result}")
36
 
37
  # Debugs
38
  if not result:
39
+ logger.warning(f"⛔️ No fetch result for {req.source} with ref {req.ref}")
40
  raise HTTPException(403, "Download not permitted")
41
  if not result.get("download_url"):
42
+ logger.warning(f"📄 No download URL from {req.source}. Viewability: {result.get('viewability', 'unknown')}")
43
  raise HTTPException(403, "Download not permitted")
44
 
45
  # Write temp file and save as Pdf from downloadable link
46
  download_url = result["download_url"]
47
  file_path = f"/tmp/{req.candidate_id}.pdf"
48
+ logger.info(f"⬇️ Downloading from: {download_url}")
49
 
50
+ # Read and write file
51
+ try:
52
+ async with aiofiles.open(file_path, mode='wb') as f:
53
+ async with httpx.AsyncClient() as client:
54
+ r = await client.get(download_url)
55
+ r.raise_for_status()
56
+ await f.write(r.content)
57
+ logger.info(f"✅ PDF saved to {file_path}")
58
+ except Exception as e:
59
+ logger.error(f"🚨 Failed to download or write PDF: {e}")
60
+ raise HTTPException(500, "Failed to download PDF")
61
+
62
+ # Save to bucket
63
+ try:
64
+ with open(file_path, "rb") as f:
65
+ await grid_fs_bucket.upload_from_stream(f"{req.candidate_id}.pdf", f)
66
+ os.remove(file_path)
67
+ except Exception as e:
68
+ logger.error(f"💥 Failed to upload to GridFS: {e}")
69
+ raise HTTPException(500, "Storage failed")
70
+
71
+ # Doc tags
72
  doc = {
73
  "_id": req.candidate_id,
74
  "title": req.title,
75
  "status": "queued",
76
  "metadata": result
77
  }
78
+ await db.documents.insert_one(doc)
79
  asyncio.create_task(parse_and_index(req.candidate_id))
80
+ logger.info(f"📚 Document {req.candidate_id} queued for indexing")
81
  return {"document_id": req.candidate_id, "status": "queued"}
82
 
app/services/google_books.py CHANGED
@@ -1,6 +1,8 @@
1
  # app/services/google_books.py
2
  import httpx, os
3
  from tenacity import retry, stop_after_attempt, wait_fixed
 
 
4
 
5
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
6
  async def search(q):
@@ -25,4 +27,7 @@ async def search(q):
25
  ]
26
 
27
  async def fetch(ref):
28
- return None # Google doesn't permit download
 
 
 
 
1
  # app/services/google_books.py
2
  import httpx, os
3
  from tenacity import retry, stop_after_attempt, wait_fixed
4
+ import logging
5
+ logger = logging.getLogger("book-query")
6
 
7
  @retry(stop=stop_after_attempt(3), wait=wait_fixed(1))
8
  async def search(q):
 
27
  ]
28
 
29
  async def fetch(ref):
30
+ logger = logging.getLogger("book-query")
31
+ logger.debug(f"[Google] fetch() called with ref={ref}")
32
+ return None # Always returns None, so import will 403
33
+