Spaces:

Ransted
/

shelf-scanner

Sleeping

App Files Files Community

Ransted commited on Sep 27, 2025

Commit

75fcf75

1 Parent(s): 6db91bc

third commit

Browse files

Files changed (3) hide show

app.py +175 -80
best.pt +2 -2
requirements.txt +4 -9

app.py CHANGED Viewed

@@ -1,106 +1,201 @@
 import os
 import cv2
 import requests
 import easyocr
-from flask import Flask, request, jsonify
 from ultralytics import YOLO
-app = Flask(__name__)
-# Load YOLO model
-model = YOLO("best.pt")   # make sure best.pt is in the same folder
-# Load EasyOCR reader
 reader = easyocr.Reader(['en'], gpu=False)
-def search_google_books(query):
-    """Search Google Books API using extracted text"""
-    url = f"https://www.googleapis.com/books/v1/volumes?q={query}"
-    r = requests.get(url).json()
-    if "items" not in r:
-        return None
-    book = r["items"][0]["volumeInfo"]
-    # Extract ISBN if available
-    isbn = None
-    if "industryIdentifiers" in book:
-        for identifier in book["industryIdentifiers"]:
-            if identifier["type"] in ["ISBN_10", "ISBN_13"]:
-                isbn = identifier["identifier"]
-                break
-    # Extract cover image
-    cover_img = None
-    if "imageLinks" in book:
-        cover_img = book["imageLinks"].get("thumbnail")
-    details = {
-        "title": book.get("title", "Unknown"),
-        "authors": ", ".join(book.get("authors", [])),
-        "publisher": book.get("publisher", "Unknown"),
-        "description": book.get("description", "No description available"),
-        "isbn": isbn,
-        "preview_link": book.get("previewLink", "#"),
-        "cover_image": cover_img,
-        "buy_links": build_buy_links(isbn, book.get("title", ""))
-    }
-    return details
 def build_buy_links(isbn, title):
-    """Return buying links from different sites"""
     links = {}
     if isbn:
         links["Amazon"] = f"https://www.amazon.in/s?k={isbn}"
         links["Flipkart"] = f"https://www.flipkart.com/search?q={isbn}"
     else:
-        # fallback to title search
-        links["Amazon"] = f"https://www.amazon.in/s?k={title.replace(' ', '+')}"
-        links["Flipkart"] = f"https://www.flipkart.com/search?q={title.replace(' ', '+')}"
-    # Google Books direct link
-    links["Google Books"] = f"https://www.google.com/search?q={title.replace(' ', '+')}+site:books.google.com"
     return links
-@app.route("/analyze", methods=["POST"])
-def analyze():
-    """API endpoint to analyze uploaded bookshelf image"""
-    if "image" not in request.files:
-        return jsonify({"error": "No image uploaded"}), 400
-    file = request.files["image"]
-    filepath = os.path.join("/tmp", file.filename)
-    file.save(filepath)
-    img = cv2.imread(filepath)
-    results = model.predict(source=filepath, conf=0.5)
     books = []
-    for i, box in enumerate(results[0].boxes):
-        x1, y1, x2, y2 = map(int, box.xyxy[0])
-        crop = img[y1:y2, x1:x2]
-        crop_path = f"/tmp/crop_{i}.jpg"
-        cv2.imwrite(crop_path, crop)
-        # OCR on cropped image
-        ocr_result = reader.readtext(crop_path)
-        text = " ".join([t[1] for t in ocr_result]).strip()
-        if not text:
             continue
-        # Query Google Books
-        details = search_google_books(text)
-        if details:
-            books.append(details)
-    return jsonify({"books": books})
 if __name__ == "__main__":
-    port = int(os.environ.get("PORT", 7860))  # Hugging Face uses 7860
-    app.run(host="0.0.0.0", port=port)

+# app.py
 import os
+import uuid
 import cv2
+import numpy as np
 import requests
 import easyocr
+import gradio as gr
 from ultralytics import YOLO
+# Load YOLO model (put best.pt in same folder)
+model = YOLO("best.pt")
+# Load EasyOCR once
 reader = easyocr.Reader(['en'], gpu=False)
+# ---- Helpers: external APIs and merging ----
 def build_buy_links(isbn, title):
     links = {}
     if isbn:
         links["Amazon"] = f"https://www.amazon.in/s?k={isbn}"
         links["Flipkart"] = f"https://www.flipkart.com/search?q={isbn}"
     else:
+        q = requests.utils.requote_uri(title or "")
+        links["Amazon"] = f"https://www.amazon.in/s?k={q}"
+        links["Flipkart"] = f"https://www.flipkart.com/search?q={q}"
+    links["Google Books"] = f"https://www.google.com/search?q={requests.utils.requote_uri((title or '') + ' site:books.google.com')}"
     return links
+def search_google_books(query, genre_hint=None):
+    if not query:
+        return None
+    q = requests.utils.requote_uri(query)
+    url = f"https://www.googleapis.com/books/v1/volumes?q={q}"
+    if genre_hint:
+        url += f"+subject:{requests.utils.requote_uri(genre_hint)}"
+    try:
+        r = requests.get(url, timeout=10)
+        r.raise_for_status()
+        data = r.json()
+    except Exception:
+        return None
+    if "items" not in data or len(data["items"]) == 0:
+        return None
+    item = data["items"][0].get("volumeInfo", {})
+    isbn = None
+    for ident in item.get("industryIdentifiers", []):
+        if ident.get("type") in ("ISBN_10", "ISBN_13"):
+            isbn = ident.get("identifier")
+            break
+    cover = item.get("imageLinks", {}).get("thumbnail")
+    return {
+        "title": item.get("title", "Unknown"),
+        "authors": ", ".join(item.get("authors", [])) if item.get("authors") else "Unknown",
+        "publisher": item.get("publisher", "Unknown"),
+        "description": item.get("description", "No description available"),
+        "isbn": isbn,
+        "preview_link": item.get("previewLink", "#"),
+        "cover_image": cover,
+        "buy_links": build_buy_links(isbn, item.get("title", ""))
+    }
+def search_openlibrary(query):
+    if not query:
+        return None
+    url = f"https://openlibrary.org/search.json?q={requests.utils.requote_uri(query)}"
+    try:
+        r = requests.get(url, timeout=10)
+        r.raise_for_status()
+        data = r.json()
+    except Exception:
+        return None
+    docs = data.get("docs", [])
+    if not docs:
+        return None
+    doc = docs[0]
+    isbn = None
+    if isinstance(doc.get("isbn"), list) and len(doc.get("isbn")) > 0:
+        isbn = doc.get("isbn")[0]
+    cover = f"https://covers.openlibrary.org/b/isbn/{isbn}-L.jpg" if isbn else None
+    return {
+        "title": doc.get("title", "Unknown"),
+        "authors": ", ".join(doc.get("author_name", [])) if doc.get("author_name") else "Unknown",
+        "publisher": ", ".join(doc.get("publisher", [])) if doc.get("publisher") else "Unknown",
+        "description": (doc.get("first_sentence") or ["No description available"])[0] if doc.get("first_sentence") else "No description available",
+        "isbn": isbn,
+        "preview_link": f"https://openlibrary.org{doc.get('key')}" if doc.get("key") else "#",
+        "cover_image": cover,
+        "buy_links": build_buy_links(isbn, doc.get("title", ""))
+    }
+def merge_results(google_result, ol_result):
+    """Prefer Google but fill missing fields with OpenLibrary"""
+    if not google_result and not ol_result:
+        return None
+    if not google_result:
+        return ol_result
+    if not ol_result:
+        return google_result
+    merged = dict(google_result)  # shallow copy
+    for k in ("authors", "publisher", "description", "isbn", "cover_image"):
+        if not merged.get(k) or merged.get(k) in ("Unknown", None, ""):
+            merged[k] = ol_result.get(k, merged.get(k))
+    # keep buy_links from google if present else OL
+    if not merged.get("buy_links"):
+        merged["buy_links"] = ol_result.get("buy_links", {})
+    return merged
+# ---- Core analyze function used by Gradio ----
+def analyze_gradio(image, genre=None):
+    """
+    image: PIL Image
+    genre: optional string
+    returns: {"books": [ {title, authors, publisher, description, isbn, preview_link, cover_image, buy_links}, ... ]}
+    """
+    if image is None:
+        return {"books": []}
+    # Save uploaded image to tmp
+    tmp_path = f"/tmp/{uuid.uuid4()}.jpg"
+    image.save(tmp_path)
+    # Convert to OpenCV BGR image
+    cv_img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    if cv_img is None:
+        return {"books": []}
+    # Run YOLO detection
+    try:
+        results = model.predict(source=tmp_path, conf=0.4, verbose=False)
+    except Exception as e:
+        print("YOLO predict error:", e)
+        return {"books": []}
     books = []
+    # small padding to expand bbox for OCR (in pixels)
+    PAD_PX = 8
+    try:
+        boxes = results[0].boxes
+    except Exception:
+        boxes = []
+    for i, box in enumerate(boxes):
+        try:
+            # get coordinates
+            xy = box.xyxy[0].tolist()
+            x1, y1, x2, y2 = map(int, xy)
+            h, w = cv_img.shape[:2]
+            # expand bbox slightly
+            x1 = max(0, x1 - PAD_PX)
+            y1 = max(0, y1 - PAD_PX)
+            x2 = min(w - 1, x2 + PAD_PX)
+            y2 = min(h - 1, y2 + PAD_PX)
+            if x2 <= x1 or y2 <= y1:
+                continue
+            crop = cv_img[y1:y2, x1:x2]
+            crop_path = f"/tmp/crop_{uuid.uuid4()}.jpg"
+            cv2.imwrite(crop_path, crop)
+            # OCR - paragraph mode to merge lines
+            ocr_result = reader.readtext(crop_path, detail=1, paragraph=True)
+            # ocr_result items: list of (bbox, text, conf) when detail=1
+            texts = []
+            for item in ocr_result:
+                # item could be (bbox, text, conf)
+                if isinstance(item, (list, tuple)) and len(item) >= 2:
+                    texts.append(item[1])
+                elif isinstance(item, str):
+                    texts.append(item)
+            text = " ".join(texts).strip()
+            if not text:
+                continue
+            # Query both APIs
+            google_data = search_google_books(text, genre_hint=genre)
+            ol_data = search_openlibrary(text)
+            details = merge_results(google_data, ol_data)
+            if details:
+                books.append(details)
+        except Exception as e:
+            print("Error processing box:", e)
             continue
+    return {"books": books}
+# ---- Gradio interface (API-only) ----
+iface = gr.Interface(
+    fn=analyze_gradio,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Genre (optional)")
+    ],
+    outputs="json",
+    allow_flagging="never",
+    description="Upload bookshelf image + optional genre. Detects book spines, OCRs text, queries Google Books + OpenLibrary and returns merged results."
+)
 if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), api=True)

best.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:06022f89242bbc8cb3981932b115cbf4d77a4e9abc7b3b750d8861884353ebcf
-size 6250339

 version https://git-lfs.github.com/spec/v1
+oid sha256:90108f607ef72d407612faab0715cd916afe9f7e2291a35fdf632af057609325
+size 6246250

requirements.txt CHANGED Viewed

@@ -1,14 +1,9 @@
-flask
-ultralytics
-easyocr
-opencv-python-headless
-requests
 ultralytics>=8.0.0
 torch>=2.0.0
 torchvision>=0.15.0
-torchaudio>=2.0.0
-opencv-python>=4.7.0
 numpy>=1.24.0
 pillow>=9.0.0
-matplotlib>=3.7.0
-gradio>=3.39.0

+gradio>=3.39.0
 ultralytics>=8.0.0
 torch>=2.0.0
 torchvision>=0.15.0
+opencv-python-headless>=4.7.0
 numpy>=1.24.0
 pillow>=9.0.0
+easyocr>=1.6
+requests>=2.31.0