shelf-scanner / app.py
Ransted's picture
trial-7
5ff3913
import os
import uuid
import cv2
import numpy as np
import requests
import easyocr
import gradio as gr
from ultralytics import YOLO
import re
# ---------------- Load Models ----------------
model = YOLO("best.pt") # your trained YOLOv8 model
reader = easyocr.Reader(['en'], gpu=False)
# ---------------- Regex ----------------
ISBN_REGEX = r'\b(?:97[89])?\d{9}[\dX]\b'
def extract_isbn(text):
"""Extract ISBN-10/13 if present"""
match = re.search(ISBN_REGEX, text.replace("-", "").replace(" ", ""))
return match.group(0) if match else None
# ---------------- Preprocess Crop ----------------
def preprocess_crop(crop):
"""Enhance cropped image for OCR"""
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.adaptiveThreshold(
gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 8
)
return thresh
# ---------------- Buy Links ----------------
def build_buy_links(isbn, title):
links = {}
if isbn:
links["Amazon"] = f"https://www.amazon.in/s?k={isbn}"
links["Flipkart"] = f"https://www.flipkart.com/search?q={isbn}"
else:
q = requests.utils.requote_uri(title or "")
links["Amazon"] = f"https://www.amazon.in/s?k={q}"
links["Flipkart"] = f"https://www.flipkart.com/search?q={q}"
links["Google Books"] = f"https://www.google.com/search?q={requests.utils.requote_uri((title or '') + ' site:books.google.com')}"
return links
# ---------------- API Queries ----------------
def search_google_books(query):
if not query:
return None
url = f"https://www.googleapis.com/books/v1/volumes?q={requests.utils.requote_uri(query)}"
try:
r = requests.get(url, timeout=10)
data = r.json()
except:
return None
if "items" not in data:
return None
info = data["items"][0].get("volumeInfo", {})
isbn = None
for ident in info.get("industryIdentifiers", []):
if ident.get("type") in ("ISBN_10", "ISBN_13"):
isbn = ident.get("identifier")
break
return {
"title": info.get("title", "Unknown"),
"authors": ", ".join(info.get("authors", [])) if info.get("authors") else "Unknown",
"publisher": info.get("publisher", "Unknown"),
"description": info.get("description", "No description"),
"isbn": isbn,
"preview_link": info.get("previewLink", "#"),
"cover_image": info.get("imageLinks", {}).get("thumbnail"),
"buy_links": build_buy_links(isbn, info.get("title", ""))
}
def search_openlibrary(query):
if not query:
return None
url = f"https://openlibrary.org/search.json?q={requests.utils.requote_uri(query)}"
try:
r = requests.get(url, timeout=10)
data = r.json()
except:
return None
docs = data.get("docs", [])
if not docs:
return None
d = docs[0]
isbn = d["isbn"][0] if "isbn" in d and isinstance(d["isbn"], list) else None
cover = f"https://covers.openlibrary.org/b/isbn/{isbn}-L.jpg" if isbn else None
return {
"title": d.get("title", "Unknown"),
"authors": ", ".join(d.get("author_name", [])) if d.get("author_name") else "Unknown",
"publisher": ", ".join(d.get("publisher", [])) if d.get("publisher") else "Unknown",
"description": (d.get("first_sentence") or ["No description"])[0] if d.get("first_sentence") else "No description",
"isbn": isbn,
"preview_link": f"https://openlibrary.org{d.get('key')}" if d.get("key") else "#",
"cover_image": cover,
"buy_links": build_buy_links(isbn, d.get("title", ""))
}
def merge_results(g, o):
if not g and not o: return None
if not g: return o
if not o: return g
merged = dict(g)
for k in ("authors", "publisher", "description", "isbn", "cover_image"):
if not merged.get(k) or merged[k] in ("Unknown", "No description", None):
merged[k] = o.get(k, merged.get(k))
return merged
# ---------------- Core Function ----------------
def analyze_gradio(image):
if image is None:
return {"books": []}
cv_img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
tmp_path = f"/tmp/{uuid.uuid4()}.jpg"
cv2.imwrite(tmp_path, cv_img)
try:
results = model.predict(source=tmp_path, conf=0.4, verbose=False)
except Exception as e:
print("YOLO Error:", e)
return {"books": []}
books = []
for i, box in enumerate(results[0].boxes):
try:
x1, y1, x2, y2 = map(int, box.xyxy[0])
crop = cv_img[y1:y2, x1:x2]
if crop.size == 0: continue
proc = preprocess_crop(crop)
crop_path = f"/tmp/crop_{uuid.uuid4()}.jpg"
cv2.imwrite(crop_path, proc)
# OCR
ocr_result = reader.readtext(crop_path, detail=0, paragraph=True)
ocr_text = " ".join(ocr_result).strip()
if not ocr_text: continue
# ISBN check
found_isbn = extract_isbn(ocr_text)
if found_isbn:
g = search_google_books(found_isbn)
o = search_openlibrary(found_isbn)
else:
# Use top 2 words as fallback query
q = " ".join(ocr_text.split()[:5])
g = search_google_books(q)
o = search_openlibrary(q)
details = merge_results(g, o)
if details: books.append(details)
except Exception as e:
print("Error box:", e)
continue
return {"books": books}
# ---------------- Gradio App ----------------
iface = gr.Interface(
fn=analyze_gradio,
inputs=gr.Image(type="pil", label="Upload Bookshelf Image"),
outputs="json",
title="Shelf Scanner",
description="Upload a bookshelf image → detect books → OCR text → fetch details from Google Books & OpenLibrary."
)
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))