Spaces:

Sairii
/

Book_app

Sleeping

App Files Files Community

Book_app / data_sources.py

Sairii

Upload 4 files

f929f41 verified 5 months ago

raw

history blame contribute delete

3.04 kB

	# Import necessary libraries
	import requests
	from typing import Dict, Any, List, Optional

	# URL
	GOOGLE_URL = "https://www.googleapis.com/books/v1/volumes"
	OPEN_LIB_SEARCH = "https://openlibrary.org/search.json"

	def google_books(query: str, max_results: int = 15) -> List[Dict]:
	""" Fetch raw book information from Google Books and convert it into a small dictionary per book. """
	r = requests.get(GOOGLE_URL, params={"q": query, "maxResults": max_results}, timeout=30)
	r.raise_for_status()
	out = []
	for item in r.json().get("items", []) or []:
	vi = item.get("volumeInfo", {}) or {}
	out.append({
	"id": item.get("id"),
	"title": vi.get("title"),
	"authors": "; ".join(vi.get("authors", []) or []),
	"description": vi.get("description", "") or "",
	"categories": "; ".join(vi.get("categories", []) or []),
	"year": (vi.get("publishedDate", "") or "")[:4], # to get only the year
	"cover_url": (vi.get("imageLinks", {}) or {}).get("thumbnail"),
	"preview_url": vi.get("previewLink"),
	"source": "google",
	"subjects": "",
	"raw": item,
	})
	return out


	def openlibrary_enrich(title:str) -> Dict[str, Optional[str]]:
	""" Get subjects/tags and better covers if possible by using Open Library.
	If any errors occurs during the process, ir returns "not found" results.
	"""
	try:
	# Make a http request
	r = requests.get(OPEN_LIB_SEARCH, params={"title": title, "limit": 1}, timeout=30)
	# check the request status
	r.raise_for_status()
	# parses the response as json and then retrieves the key associate with "docs"
	docs = r.json().get("docs", [])
	# if not titles found return a dictionary saying None
	if not docs:
	return {"subjects": None, "ol_cover_url": None}
	d = docs[0]
	subjects = "; ".join((d.get("subject") or [])[:20]) if d.get("subject") else None
	# construcst the cover url
	cover = f"https://covers.openlibrary.org/b/id/{d['cover_i']}-M.jpg" if d.get("cover_i") else None
	# returns a dictionary contaning subjects and cover url
	return {"subjects": subjects, "ol_cover_url": cover}
	except Exception:
	return {"subjects": None, "ol_cover_url": None}


	def normalize(book: Dict[str, Any], enrich: Dict[str, Any]) -> Dict[str, Any]:
	""" Merges the infomation from Google books and the information from Open Library
	Inputs:
	book: dictionary from the function google_books()
	enrich: dictionary from the function openlibrary_enrich()
	Ouputs:
	a dictionary with tags/subjects and covers

	"""
	out = dict(book)
	if enrich.get("subjects"):
	out["subjects"] = enrich["subjects"]
	if enrich.get("ol_cover_url"):
	out["cover_url"] = enrich["ol_cover_url"]
	return out