Book_app / data_sources.py
Sairii's picture
Upload 4 files
f929f41 verified
# Import necessary libraries
import requests
from typing import Dict, Any, List, Optional
# URL
GOOGLE_URL = "https://www.googleapis.com/books/v1/volumes"
OPEN_LIB_SEARCH = "https://openlibrary.org/search.json"
def google_books(query: str, max_results: int = 15) -> List[Dict]:
""" Fetch raw book information from Google Books and convert it into a small dictionary per book. """
r = requests.get(GOOGLE_URL, params={"q": query, "maxResults": max_results}, timeout=30)
r.raise_for_status()
out = []
for item in r.json().get("items", []) or []:
vi = item.get("volumeInfo", {}) or {}
out.append({
"id": item.get("id"),
"title": vi.get("title"),
"authors": "; ".join(vi.get("authors", []) or []),
"description": vi.get("description", "") or "",
"categories": "; ".join(vi.get("categories", []) or []),
"year": (vi.get("publishedDate", "") or "")[:4], # to get only the year
"cover_url": (vi.get("imageLinks", {}) or {}).get("thumbnail"),
"preview_url": vi.get("previewLink"),
"source": "google",
"subjects": "",
"raw": item,
})
return out
def openlibrary_enrich(title:str) -> Dict[str, Optional[str]]:
""" Get subjects/tags and better covers if possible by using Open Library.
If any errors occurs during the process, ir returns "not found" results.
"""
try:
# Make a http request
r = requests.get(OPEN_LIB_SEARCH, params={"title": title, "limit": 1}, timeout=30)
# check the request status
r.raise_for_status()
# parses the response as json and then retrieves the key associate with "docs"
docs = r.json().get("docs", [])
# if not titles found return a dictionary saying None
if not docs:
return {"subjects": None, "ol_cover_url": None}
d = docs[0]
subjects = "; ".join((d.get("subject") or [])[:20]) if d.get("subject") else None
# construcst the cover url
cover = f"https://covers.openlibrary.org/b/id/{d['cover_i']}-M.jpg" if d.get("cover_i") else None
# returns a dictionary contaning subjects and cover url
return {"subjects": subjects, "ol_cover_url": cover}
except Exception:
return {"subjects": None, "ol_cover_url": None}
def normalize(book: Dict[str, Any], enrich: Dict[str, Any]) -> Dict[str, Any]:
""" Merges the infomation from Google books and the information from Open Library
Inputs:
book: dictionary from the function google_books()
enrich: dictionary from the function openlibrary_enrich()
Ouputs:
a dictionary with tags/subjects and covers
"""
out = dict(book)
if enrich.get("subjects"):
out["subjects"] = enrich["subjects"]
if enrich.get("ol_cover_url"):
out["cover_url"] = enrich["ol_cover_url"]
return out