Spaces:
Sleeping
Sleeping
Commit
·
bb294dd
1
Parent(s):
43b78de
Update pg url handler
Browse files
app/services/project_gutenberg.py
CHANGED
|
@@ -56,19 +56,32 @@ async def search(q: str):
|
|
| 56 |
|
| 57 |
# Fetch items
|
| 58 |
async def fetch(ref: dict):
|
| 59 |
-
"""For import:
|
| 60 |
gid = ref.get("id")
|
| 61 |
if not gid:
|
| 62 |
return None
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
async with httpx.AsyncClient(timeout=10) as client:
|
| 65 |
-
|
| 66 |
-
if
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
)
|
| 72 |
-
if pdf_link:
|
| 73 |
-
return {"download_available": True, "download_url": pdf_link}
|
| 74 |
return None
|
|
|
|
| 56 |
|
| 57 |
# Fetch items
|
| 58 |
async def fetch(ref: dict):
|
| 59 |
+
"""For import: return direct PDF link if available via Gutendex or fallback to Gutenberg."""
|
| 60 |
gid = ref.get("id")
|
| 61 |
if not gid:
|
| 62 |
return None
|
| 63 |
+
# Trailing to preview page for PDF confirmation
|
| 64 |
+
gutendex_url = f"https://gutendex.com/books/{gid}/" # ensure trailing slash
|
| 65 |
+
try:
|
| 66 |
+
async with httpx.AsyncClient(timeout=10, follow_redirects=True) as client:
|
| 67 |
+
r = await client.get(gutendex_url)
|
| 68 |
+
if r.status_code == 200:
|
| 69 |
+
data = r.json()
|
| 70 |
+
pdf_link = next(
|
| 71 |
+
(v for k, v in data["formats"].items() if k.lower().endswith("pdf")),
|
| 72 |
+
None
|
| 73 |
+
)
|
| 74 |
+
if pdf_link:
|
| 75 |
+
return {"download_available": True, "download_url": pdf_link}
|
| 76 |
+
except Exception as e:
|
| 77 |
+
logger.warning(f"[GUT] Gutendex metadata failed for {gid}: {e}")
|
| 78 |
+
# Fallback to static Gutenberg URL
|
| 79 |
+
fallback_url = f"https://www.gutenberg.org/files/{gid}/{gid}-pdf.pdf"
|
| 80 |
async with httpx.AsyncClient(timeout=10) as client:
|
| 81 |
+
head = await client.head(fallback_url)
|
| 82 |
+
if head.status_code == 200:
|
| 83 |
+
logger.info(f"[GUT] Using fallback PDF: {fallback_url}")
|
| 84 |
+
return {"download_available": True, "download_url": fallback_url}
|
| 85 |
+
# Log
|
| 86 |
+
logger.warning(f"[GUT] No PDF for book {gid}")
|
|
|
|
|
|
|
|
|
|
| 87 |
return None
|