Spaces:
Sleeping
Sleeping
| import logging | |
| import sys | |
| import re | |
| import html | |
| def setup_logger(name: str): | |
| """Configure and return a logger. Use DEBUG=1 for verbose output.""" | |
| from src.config import DEBUG | |
| logger = logging.getLogger(name) | |
| logger.setLevel(logging.DEBUG if DEBUG else logging.WARNING) | |
| if not logger.handlers: | |
| handler = logging.StreamHandler(sys.stdout) | |
| handler.setFormatter(logging.Formatter("%(levelname)s | %(name)s | %(message)s")) | |
| logger.addHandler(handler) | |
| return logger | |
| def summarize_description(text: str, max_sentences: int = 2, max_chars: int = 240) -> str: | |
| """Create a clean, sentence-based summary for a book description. | |
| - Decodes HTML entities (e.g., & → &) | |
| - Normalizes whitespace | |
| - Truncates by complete sentences (not raw words) | |
| - Applies a soft character cap with an ellipsis if needed | |
| """ | |
| if not text: | |
| return "—" | |
| # Decode HTML entities and normalize whitespace | |
| cleaned = html.unescape(str(text)) | |
| cleaned = re.sub(r"\s+", " ", cleaned).strip() | |
| if not cleaned: | |
| return "—" | |
| # Split into sentences on punctuation followed by whitespace | |
| sentences = re.split(r"(?<=[.!?])\s+", cleaned) | |
| selected: list[str] = [] | |
| total_len = 0 | |
| for s in sentences: | |
| if not s: | |
| continue | |
| # Tentatively add sentence if within limits | |
| if len(selected) < max_sentences and (total_len + len(s) + (1 if selected else 0)) <= max_chars: | |
| selected.append(s) | |
| total_len += len(s) + (1 if selected else 0) | |
| else: | |
| break | |
| summary = " ".join(selected).strip() | |
| if not summary: | |
| # Fallback: hard trim characters with ellipsis | |
| summary = cleaned[: max_chars].rstrip() | |
| if len(cleaned) > max_chars: | |
| summary = summary.rsplit(" ", 1)[0].rstrip() + "…" | |
| return summary | |
| # Ensure soft char cap | |
| if len(summary) > max_chars: | |
| summary = summary[: max_chars].rstrip() | |
| summary = summary.rsplit(" ", 1)[0].rstrip() + "…" | |
| return summary | |
| def enrich_book_metadata(meta: dict | None, isbn: str) -> dict: | |
| """ | |
| Enrich book metadata with dynamic cover fetching if missing. | |
| Mutates and returns the meta dictionary. | |
| """ | |
| if not meta: | |
| meta = {} | |
| # 1. Get available metadata | |
| title = meta.get("title") | |
| thumbnail = meta.get("thumbnail") | |
| author = meta.get("authors", "Unknown") | |
| # 2. Validation Check | |
| is_valid_thumb = thumbnail and str(thumbnail).lower() not in ["nan", "none", "", "null"] and "/assets/cover-not-found.jpg" not in str(thumbnail) and "cover-not-found" not in str(thumbnail) | |
| # 3. Fetch if needed | |
| if not title or not is_valid_thumb: | |
| # Lazy import to avoid circular dependency | |
| from src.cover_fetcher import fetch_book_cover | |
| fetched_cover, fetched_authors, fetched_desc = fetch_book_cover(str(isbn)) | |
| # Update if we found better data | |
| if not is_valid_thumb and "cover-not-found" not in fetched_cover: | |
| meta["thumbnail"] = fetched_cover | |
| if not title: | |
| meta["title"] = f"Book {isbn}" | |
| if author == "Unknown" and fetched_authors != "Unknown": | |
| meta["authors"] = fetched_authors | |
| # 4. Final Fallback | |
| final_thumb = meta.get("thumbnail") | |
| if not final_thumb or str(final_thumb).lower() in ["nan", "none", "", "null"] or "cover-not-found" in str(final_thumb): | |
| meta["thumbnail"] = "/content/cover-not-found.jpg" | |
| return meta | |