Spaces:

Ransted
/

shelf-scanner

Sleeping

App Files Files Community

shelf-scanner / app.py

Ransted

trial-7

5ff3913 7 months ago

raw

history blame contribute delete

6.07 kB

	import os
	import uuid
	import cv2
	import numpy as np
	import requests
	import easyocr
	import gradio as gr
	from ultralytics import YOLO
	import re

	# ---------------- Load Models ----------------
	model = YOLO("best.pt") # your trained YOLOv8 model
	reader = easyocr.Reader(['en'], gpu=False)

	# ---------------- Regex ----------------
	ISBN_REGEX = r'\b(?:97[89])?\d{9}[\dX]\b'

	def extract_isbn(text):
	"""Extract ISBN-10/13 if present"""
	match = re.search(ISBN_REGEX, text.replace("-", "").replace(" ", ""))
	return match.group(0) if match else None

	# ---------------- Preprocess Crop ----------------
	def preprocess_crop(crop):
	"""Enhance cropped image for OCR"""
	gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
	gray = cv2.GaussianBlur(gray, (3,3), 0)
	thresh = cv2.adaptiveThreshold(
	gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 8
	)
	return thresh

	# ---------------- Buy Links ----------------
	def build_buy_links(isbn, title):
	links = {}
	if isbn:
	links["Amazon"] = f"https://www.amazon.in/s?k={isbn}"
	links["Flipkart"] = f"https://www.flipkart.com/search?q={isbn}"
	else:
	q = requests.utils.requote_uri(title or "")
	links["Amazon"] = f"https://www.amazon.in/s?k={q}"
	links["Flipkart"] = f"https://www.flipkart.com/search?q={q}"
	links["Google Books"] = f"https://www.google.com/search?q={requests.utils.requote_uri((title or '') + ' site:books.google.com')}"
	return links

	# ---------------- API Queries ----------------
	def search_google_books(query):
	if not query:
	return None
	url = f"https://www.googleapis.com/books/v1/volumes?q={requests.utils.requote_uri(query)}"
	try:
	r = requests.get(url, timeout=10)
	data = r.json()
	except:
	return None
	if "items" not in data:
	return None
	info = data["items"][0].get("volumeInfo", {})
	isbn = None
	for ident in info.get("industryIdentifiers", []):
	if ident.get("type") in ("ISBN_10", "ISBN_13"):
	isbn = ident.get("identifier")
	break
	return {
	"title": info.get("title", "Unknown"),
	"authors": ", ".join(info.get("authors", [])) if info.get("authors") else "Unknown",
	"publisher": info.get("publisher", "Unknown"),
	"description": info.get("description", "No description"),
	"isbn": isbn,
	"preview_link": info.get("previewLink", "#"),
	"cover_image": info.get("imageLinks", {}).get("thumbnail"),
	"buy_links": build_buy_links(isbn, info.get("title", ""))
	}

	def search_openlibrary(query):
	if not query:
	return None
	url = f"https://openlibrary.org/search.json?q={requests.utils.requote_uri(query)}"
	try:
	r = requests.get(url, timeout=10)
	data = r.json()
	except:
	return None
	docs = data.get("docs", [])
	if not docs:
	return None
	d = docs[0]
	isbn = d["isbn"][0] if "isbn" in d and isinstance(d["isbn"], list) else None
	cover = f"https://covers.openlibrary.org/b/isbn/{isbn}-L.jpg" if isbn else None
	return {
	"title": d.get("title", "Unknown"),
	"authors": ", ".join(d.get("author_name", [])) if d.get("author_name") else "Unknown",
	"publisher": ", ".join(d.get("publisher", [])) if d.get("publisher") else "Unknown",
	"description": (d.get("first_sentence") or ["No description"])[0] if d.get("first_sentence") else "No description",
	"isbn": isbn,
	"preview_link": f"https://openlibrary.org{d.get('key')}" if d.get("key") else "#",
	"cover_image": cover,
	"buy_links": build_buy_links(isbn, d.get("title", ""))
	}

	def merge_results(g, o):
	if not g and not o: return None
	if not g: return o
	if not o: return g
	merged = dict(g)
	for k in ("authors", "publisher", "description", "isbn", "cover_image"):
	if not merged.get(k) or merged[k] in ("Unknown", "No description", None):
	merged[k] = o.get(k, merged.get(k))
	return merged

	# ---------------- Core Function ----------------
	def analyze_gradio(image):
	if image is None:
	return {"books": []}

	cv_img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	tmp_path = f"/tmp/{uuid.uuid4()}.jpg"
	cv2.imwrite(tmp_path, cv_img)

	try:
	results = model.predict(source=tmp_path, conf=0.4, verbose=False)
	except Exception as e:
	print("YOLO Error:", e)
	return {"books": []}

	books = []
	for i, box in enumerate(results[0].boxes):
	try:
	x1, y1, x2, y2 = map(int, box.xyxy[0])
	crop = cv_img[y1:y2, x1:x2]
	if crop.size == 0: continue
	proc = preprocess_crop(crop)
	crop_path = f"/tmp/crop_{uuid.uuid4()}.jpg"
	cv2.imwrite(crop_path, proc)

	# OCR
	ocr_result = reader.readtext(crop_path, detail=0, paragraph=True)
	ocr_text = " ".join(ocr_result).strip()
	if not ocr_text: continue

	# ISBN check
	found_isbn = extract_isbn(ocr_text)
	if found_isbn:
	g = search_google_books(found_isbn)
	o = search_openlibrary(found_isbn)
	else:
	# Use top 2 words as fallback query
	q = " ".join(ocr_text.split()[:5])
	g = search_google_books(q)
	o = search_openlibrary(q)

	details = merge_results(g, o)
	if details: books.append(details)

	except Exception as e:
	print("Error box:", e)
	continue

	return {"books": books}

	# ---------------- Gradio App ----------------
	iface = gr.Interface(
	fn=analyze_gradio,
	inputs=gr.Image(type="pil", label="Upload Bookshelf Image"),
	outputs="json",
	title="Shelf Scanner",
	description="Upload a bookshelf image → detect books → OCR text → fetch details from Google Books & OpenLibrary."
	)

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))