LiamKhoaLe commited on
Commit
43b78de
·
1 Parent(s): 438b6fe

Update endpoint url from ia source

Browse files
Files changed (1) hide show
  1. app/services/internet_archive.py +12 -5
app/services/internet_archive.py CHANGED
@@ -26,14 +26,21 @@ async def fetch(ref):
26
  identifier = ref.get("id")
27
  if not identifier:
28
  return None
 
29
  url = f"https://archive.org/metadata/{identifier}"
30
  async with httpx.AsyncClient(timeout=5) as client:
31
  res = await client.get(url)
32
  metadata = res.json()
33
  rights = metadata.get("metadata", {}).get("rights", "")
34
- if "public" in rights.lower():
35
- return {
36
- "download_available": True,
37
- "download_url": f"https://archive.org/download/{identifier}/{identifier}.pdf"
38
- }
 
 
 
 
 
39
  return {"download_available": False}
 
 
26
  identifier = ref.get("id")
27
  if not identifier:
28
  return None
29
+
30
  url = f"https://archive.org/metadata/{identifier}"
31
  async with httpx.AsyncClient(timeout=5) as client:
32
  res = await client.get(url)
33
  metadata = res.json()
34
  rights = metadata.get("metadata", {}).get("rights", "")
35
+ files = metadata.get("files", [])
36
+
37
+ # Prefer the actual PDF file name
38
+ for f in files:
39
+ if f.get("format", "").lower() == "pdf" and f.get("name", "").endswith(".pdf"):
40
+ return {
41
+ "download_available": "public" in rights.lower(),
42
+ "download_url": f"https://archive.org/download/{identifier}/{f['name']}"
43
+ }
44
+
45
  return {"download_available": False}
46
+