Ransted commited on
Commit
75fcf75
·
1 Parent(s): 6db91bc

third commit

Browse files
Files changed (3) hide show
  1. app.py +175 -80
  2. best.pt +2 -2
  3. requirements.txt +4 -9
app.py CHANGED
@@ -1,106 +1,201 @@
 
1
  import os
 
2
  import cv2
 
3
  import requests
4
  import easyocr
5
- from flask import Flask, request, jsonify
6
  from ultralytics import YOLO
7
 
8
- app = Flask(__name__)
 
9
 
10
- # Load YOLO model
11
- model = YOLO("best.pt") # make sure best.pt is in the same folder
12
- # Load EasyOCR reader
13
  reader = easyocr.Reader(['en'], gpu=False)
14
 
15
-
16
- def search_google_books(query):
17
- """Search Google Books API using extracted text"""
18
- url = f"https://www.googleapis.com/books/v1/volumes?q={query}"
19
- r = requests.get(url).json()
20
-
21
- if "items" not in r:
22
- return None
23
-
24
- book = r["items"][0]["volumeInfo"]
25
-
26
- # Extract ISBN if available
27
- isbn = None
28
- if "industryIdentifiers" in book:
29
- for identifier in book["industryIdentifiers"]:
30
- if identifier["type"] in ["ISBN_10", "ISBN_13"]:
31
- isbn = identifier["identifier"]
32
- break
33
-
34
- # Extract cover image
35
- cover_img = None
36
- if "imageLinks" in book:
37
- cover_img = book["imageLinks"].get("thumbnail")
38
-
39
- details = {
40
- "title": book.get("title", "Unknown"),
41
- "authors": ", ".join(book.get("authors", [])),
42
- "publisher": book.get("publisher", "Unknown"),
43
- "description": book.get("description", "No description available"),
44
- "isbn": isbn,
45
- "preview_link": book.get("previewLink", "#"),
46
- "cover_image": cover_img,
47
- "buy_links": build_buy_links(isbn, book.get("title", ""))
48
- }
49
- return details
50
-
51
-
52
  def build_buy_links(isbn, title):
53
- """Return buying links from different sites"""
54
  links = {}
55
  if isbn:
56
  links["Amazon"] = f"https://www.amazon.in/s?k={isbn}"
57
  links["Flipkart"] = f"https://www.flipkart.com/search?q={isbn}"
58
  else:
59
- # fallback to title search
60
- links["Amazon"] = f"https://www.amazon.in/s?k={title.replace(' ', '+')}"
61
- links["Flipkart"] = f"https://www.flipkart.com/search?q={title.replace(' ', '+')}"
62
-
63
- # Google Books direct link
64
- links["Google Books"] = f"https://www.google.com/search?q={title.replace(' ', '+')}+site:books.google.com"
65
-
66
  return links
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- @app.route("/analyze", methods=["POST"])
70
- def analyze():
71
- """API endpoint to analyze uploaded bookshelf image"""
72
- if "image" not in request.files:
73
- return jsonify({"error": "No image uploaded"}), 400
74
-
75
- file = request.files["image"]
76
- filepath = os.path.join("/tmp", file.filename)
77
- file.save(filepath)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- img = cv2.imread(filepath)
80
- results = model.predict(source=filepath, conf=0.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  books = []
83
- for i, box in enumerate(results[0].boxes):
84
- x1, y1, x2, y2 = map(int, box.xyxy[0])
85
- crop = img[y1:y2, x1:x2]
86
- crop_path = f"/tmp/crop_{i}.jpg"
87
- cv2.imwrite(crop_path, crop)
88
-
89
- # OCR on cropped image
90
- ocr_result = reader.readtext(crop_path)
91
- text = " ".join([t[1] for t in ocr_result]).strip()
92
-
93
- if not text:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  continue
95
 
96
- # Query Google Books
97
- details = search_google_books(text)
98
- if details:
99
- books.append(details)
100
-
101
- return jsonify({"books": books})
102
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
  if __name__ == "__main__":
105
- port = int(os.environ.get("PORT", 7860)) # Hugging Face uses 7860
106
- app.run(host="0.0.0.0", port=port)
 
1
+ # app.py
2
  import os
3
+ import uuid
4
  import cv2
5
+ import numpy as np
6
  import requests
7
  import easyocr
8
+ import gradio as gr
9
  from ultralytics import YOLO
10
 
11
+ # Load YOLO model (put best.pt in same folder)
12
+ model = YOLO("best.pt")
13
 
14
+ # Load EasyOCR once
 
 
15
  reader = easyocr.Reader(['en'], gpu=False)
16
 
17
+ # ---- Helpers: external APIs and merging ----
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  def build_buy_links(isbn, title):
 
19
  links = {}
20
  if isbn:
21
  links["Amazon"] = f"https://www.amazon.in/s?k={isbn}"
22
  links["Flipkart"] = f"https://www.flipkart.com/search?q={isbn}"
23
  else:
24
+ q = requests.utils.requote_uri(title or "")
25
+ links["Amazon"] = f"https://www.amazon.in/s?k={q}"
26
+ links["Flipkart"] = f"https://www.flipkart.com/search?q={q}"
27
+ links["Google Books"] = f"https://www.google.com/search?q={requests.utils.requote_uri((title or '') + ' site:books.google.com')}"
 
 
 
28
  return links
29
 
30
+ def search_google_books(query, genre_hint=None):
31
+ if not query:
32
+ return None
33
+ q = requests.utils.requote_uri(query)
34
+ url = f"https://www.googleapis.com/books/v1/volumes?q={q}"
35
+ if genre_hint:
36
+ url += f"+subject:{requests.utils.requote_uri(genre_hint)}"
37
+ try:
38
+ r = requests.get(url, timeout=10)
39
+ r.raise_for_status()
40
+ data = r.json()
41
+ except Exception:
42
+ return None
43
+ if "items" not in data or len(data["items"]) == 0:
44
+ return None
45
+ item = data["items"][0].get("volumeInfo", {})
46
+ isbn = None
47
+ for ident in item.get("industryIdentifiers", []):
48
+ if ident.get("type") in ("ISBN_10", "ISBN_13"):
49
+ isbn = ident.get("identifier")
50
+ break
51
+ cover = item.get("imageLinks", {}).get("thumbnail")
52
+ return {
53
+ "title": item.get("title", "Unknown"),
54
+ "authors": ", ".join(item.get("authors", [])) if item.get("authors") else "Unknown",
55
+ "publisher": item.get("publisher", "Unknown"),
56
+ "description": item.get("description", "No description available"),
57
+ "isbn": isbn,
58
+ "preview_link": item.get("previewLink", "#"),
59
+ "cover_image": cover,
60
+ "buy_links": build_buy_links(isbn, item.get("title", ""))
61
+ }
62
 
63
+ def search_openlibrary(query):
64
+ if not query:
65
+ return None
66
+ url = f"https://openlibrary.org/search.json?q={requests.utils.requote_uri(query)}"
67
+ try:
68
+ r = requests.get(url, timeout=10)
69
+ r.raise_for_status()
70
+ data = r.json()
71
+ except Exception:
72
+ return None
73
+ docs = data.get("docs", [])
74
+ if not docs:
75
+ return None
76
+ doc = docs[0]
77
+ isbn = None
78
+ if isinstance(doc.get("isbn"), list) and len(doc.get("isbn")) > 0:
79
+ isbn = doc.get("isbn")[0]
80
+ cover = f"https://covers.openlibrary.org/b/isbn/{isbn}-L.jpg" if isbn else None
81
+ return {
82
+ "title": doc.get("title", "Unknown"),
83
+ "authors": ", ".join(doc.get("author_name", [])) if doc.get("author_name") else "Unknown",
84
+ "publisher": ", ".join(doc.get("publisher", [])) if doc.get("publisher") else "Unknown",
85
+ "description": (doc.get("first_sentence") or ["No description available"])[0] if doc.get("first_sentence") else "No description available",
86
+ "isbn": isbn,
87
+ "preview_link": f"https://openlibrary.org{doc.get('key')}" if doc.get("key") else "#",
88
+ "cover_image": cover,
89
+ "buy_links": build_buy_links(isbn, doc.get("title", ""))
90
+ }
91
 
92
+ def merge_results(google_result, ol_result):
93
+ """Prefer Google but fill missing fields with OpenLibrary"""
94
+ if not google_result and not ol_result:
95
+ return None
96
+ if not google_result:
97
+ return ol_result
98
+ if not ol_result:
99
+ return google_result
100
+ merged = dict(google_result) # shallow copy
101
+ for k in ("authors", "publisher", "description", "isbn", "cover_image"):
102
+ if not merged.get(k) or merged.get(k) in ("Unknown", None, ""):
103
+ merged[k] = ol_result.get(k, merged.get(k))
104
+ # keep buy_links from google if present else OL
105
+ if not merged.get("buy_links"):
106
+ merged["buy_links"] = ol_result.get("buy_links", {})
107
+ return merged
108
+
109
+ # ---- Core analyze function used by Gradio ----
110
+ def analyze_gradio(image, genre=None):
111
+ """
112
+ image: PIL Image
113
+ genre: optional string
114
+ returns: {"books": [ {title, authors, publisher, description, isbn, preview_link, cover_image, buy_links}, ... ]}
115
+ """
116
+ if image is None:
117
+ return {"books": []}
118
+
119
+ # Save uploaded image to tmp
120
+ tmp_path = f"/tmp/{uuid.uuid4()}.jpg"
121
+ image.save(tmp_path)
122
+
123
+ # Convert to OpenCV BGR image
124
+ cv_img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
125
+ if cv_img is None:
126
+ return {"books": []}
127
+
128
+ # Run YOLO detection
129
+ try:
130
+ results = model.predict(source=tmp_path, conf=0.4, verbose=False)
131
+ except Exception as e:
132
+ print("YOLO predict error:", e)
133
+ return {"books": []}
134
 
135
  books = []
136
+ # small padding to expand bbox for OCR (in pixels)
137
+ PAD_PX = 8
138
+
139
+ try:
140
+ boxes = results[0].boxes
141
+ except Exception:
142
+ boxes = []
143
+
144
+ for i, box in enumerate(boxes):
145
+ try:
146
+ # get coordinates
147
+ xy = box.xyxy[0].tolist()
148
+ x1, y1, x2, y2 = map(int, xy)
149
+ h, w = cv_img.shape[:2]
150
+ # expand bbox slightly
151
+ x1 = max(0, x1 - PAD_PX)
152
+ y1 = max(0, y1 - PAD_PX)
153
+ x2 = min(w - 1, x2 + PAD_PX)
154
+ y2 = min(h - 1, y2 + PAD_PX)
155
+ if x2 <= x1 or y2 <= y1:
156
+ continue
157
+
158
+ crop = cv_img[y1:y2, x1:x2]
159
+ crop_path = f"/tmp/crop_{uuid.uuid4()}.jpg"
160
+ cv2.imwrite(crop_path, crop)
161
+
162
+ # OCR - paragraph mode to merge lines
163
+ ocr_result = reader.readtext(crop_path, detail=1, paragraph=True)
164
+ # ocr_result items: list of (bbox, text, conf) when detail=1
165
+ texts = []
166
+ for item in ocr_result:
167
+ # item could be (bbox, text, conf)
168
+ if isinstance(item, (list, tuple)) and len(item) >= 2:
169
+ texts.append(item[1])
170
+ elif isinstance(item, str):
171
+ texts.append(item)
172
+ text = " ".join(texts).strip()
173
+ if not text:
174
+ continue
175
+
176
+ # Query both APIs
177
+ google_data = search_google_books(text, genre_hint=genre)
178
+ ol_data = search_openlibrary(text)
179
+ details = merge_results(google_data, ol_data)
180
+ if details:
181
+ books.append(details)
182
+ except Exception as e:
183
+ print("Error processing box:", e)
184
  continue
185
 
186
+ return {"books": books}
 
 
 
 
 
187
 
188
+ # ---- Gradio interface (API-only) ----
189
+ iface = gr.Interface(
190
+ fn=analyze_gradio,
191
+ inputs=[
192
+ gr.Image(type="pil", label="Upload Image"),
193
+ gr.Textbox(label="Genre (optional)")
194
+ ],
195
+ outputs="json",
196
+ allow_flagging="never",
197
+ description="Upload bookshelf image + optional genre. Detects book spines, OCRs text, queries Google Books + OpenLibrary and returns merged results."
198
+ )
199
 
200
  if __name__ == "__main__":
201
+ iface.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), api=True)
 
best.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06022f89242bbc8cb3981932b115cbf4d77a4e9abc7b3b750d8861884353ebcf
3
- size 6250339
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90108f607ef72d407612faab0715cd916afe9f7e2291a35fdf632af057609325
3
+ size 6246250
requirements.txt CHANGED
@@ -1,14 +1,9 @@
1
- flask
2
- ultralytics
3
- easyocr
4
- opencv-python-headless
5
- requests
6
  ultralytics>=8.0.0
7
  torch>=2.0.0
8
  torchvision>=0.15.0
9
- torchaudio>=2.0.0
10
- opencv-python>=4.7.0
11
  numpy>=1.24.0
12
  pillow>=9.0.0
13
- matplotlib>=3.7.0
14
- gradio>=3.39.0
 
1
+ gradio>=3.39.0
 
 
 
 
2
  ultralytics>=8.0.0
3
  torch>=2.0.0
4
  torchvision>=0.15.0
5
+ opencv-python-headless>=4.7.0
 
6
  numpy>=1.24.0
7
  pillow>=9.0.0
8
+ easyocr>=1.6
9
+ requests>=2.31.0