LiamKhoaLe commited on
Commit
bb294dd
·
1 Parent(s): 43b78de

Update pg url handler

Browse files
Files changed (1) hide show
  1. app/services/project_gutenberg.py +24 -11
app/services/project_gutenberg.py CHANGED
@@ -56,19 +56,32 @@ async def search(q: str):
56
 
57
  # Fetch items
58
  async def fetch(ref: dict):
59
- """For import: just return the same direct PDF link."""
60
  gid = ref.get("id")
61
  if not gid:
62
  return None
63
- url = f"https://gutendex.com/books/{gid}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  async with httpx.AsyncClient(timeout=10) as client:
65
- r = await client.get(url)
66
- if r.status_code != 200:
67
- return None
68
- data = r.json()
69
- pdf_link = next(
70
- (v for k, v in data["formats"].items() if k.endswith("pdf")), None
71
- )
72
- if pdf_link:
73
- return {"download_available": True, "download_url": pdf_link}
74
  return None
 
56
 
57
  # Fetch items
58
  async def fetch(ref: dict):
59
+ """For import: return direct PDF link if available via Gutendex or fallback to Gutenberg."""
60
  gid = ref.get("id")
61
  if not gid:
62
  return None
63
+ # Trailing to preview page for PDF confirmation
64
+ gutendex_url = f"https://gutendex.com/books/{gid}/" # ensure trailing slash
65
+ try:
66
+ async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client:
67
+ r = await client.get(gutendex_url)
68
+ if r.status_code == 200:
69
+ data = r.json()
70
+ pdf_link = next(
71
+ (v for k, v in data["formats"].items() if k.lower().endswith("pdf")),
72
+ None
73
+ )
74
+ if pdf_link:
75
+ return {"download_available": True, "download_url": pdf_link}
76
+ except Exception as e:
77
+ logger.warning(f"[GUT] Gutendex metadata failed for {gid}: {e}")
78
+ # Fallback to static Gutenberg URL
79
+ fallback_url = f"https://www.gutenberg.org/files/{gid}/{gid}-pdf.pdf"
80
  async with httpx.AsyncClient(timeout=10) as client:
81
+ head = await client.head(fallback_url)
82
+ if head.status_code == 200:
83
+ logger.info(f"[GUT] Using fallback PDF: {fallback_url}")
84
+ return {"download_available": True, "download_url": fallback_url}
85
+ # Log
86
+ logger.warning(f"[GUT] No PDF for book {gid}")
 
 
 
87
  return None