Spaces:

namanraj
/

BookVisionAI

Paused

App Files Files Community

namanraj commited on Jan 8

Commit

64191f5

1 Parent(s): 8d9ac15

Add FastAPI backend for BookVision AI

Browse files

Files changed (22) hide show

README.md +4 -6
app/app/__init__.py +1 -0
app/app/__pycache__/__init__.cpython-310.pyc +0 -0
app/app/__pycache__/agent.cpython-310.pyc +0 -0
app/app/__pycache__/main.cpython-310.pyc +0 -0
app/app/agent.py +33 -0
app/app/main.py +53 -0
app/app/schema.py +20 -0
evaluation/evaluation/__init__.py +1 -0
evaluation/evaluation/__pycache__/__init__.cpython-310.pyc +0 -0
evaluation/evaluation/__pycache__/evaluation.cpython-310.pyc +0 -0
evaluation/evaluation/evaluation.py +72 -0
tools/tools/__pycache__/image_gen.cpython-310.pyc +0 -0
tools/tools/__pycache__/ocr.cpython-310.pyc +0 -0
tools/tools/__pycache__/prompt_generator.cpython-310.pyc +0 -0
tools/tools/__pycache__/summarizer.cpython-310.pyc +0 -0
tools/tools/__pycache__/web_search.cpython-310.pyc +0 -0
tools/tools/image_gen.py +30 -0
tools/tools/ocr.py +40 -0
tools/tools/prompt_generator.py +144 -0
tools/tools/summarizer.py +59 -0
tools/tools/web_search.py +82 -0

README.md CHANGED Viewed

@@ -1,10 +1,8 @@
 ---
-title: BOOKVIAIONAI
-emoji: 🏃
-colorFrom: gray
-colorTo: gray
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: BookVision AI Backend
+emoji: 📚
+colorFrom: purple
+colorTo: blue
 sdk: docker
 pinned: false
 ---

app/app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

app/app/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (165 Bytes). View file

app/app/__pycache__/agent.cpython-310.pyc ADDED Viewed

Binary file (974 Bytes). View file

app/app/__pycache__/main.cpython-310.pyc ADDED Viewed

Binary file (1.54 kB). View file

app/app/agent.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from tools.ocr import run_ocr
+from tools.web_search import fetch_book_summary
+from tools.summarizer import summarize_page
+from tools.prompt_generator import generate_image_prompt
+from tools.image_gen import generate_image
+from evaluation.evaluation import evaluate_summary
+def run_agent(image_path: str, book_name: str, author_name: str = ""):
+    ocr_text, confidence = run_ocr(image_path)
+    book_summary = fetch_book_summary(book_name, author_name)
+    page_summary = summarize_page(ocr_text)
+    # Evaluate the summary for faithfulness and hallucination
+    evaluation = evaluate_summary(ocr_text, page_summary)
+    image_prompt = generate_image_prompt(
+        page_summary=page_summary,
+        book_context=book_summary
+    )
+    image = generate_image(image_prompt)
+    return {
+        "ocr_text": ocr_text,
+        "ocr_confidence": confidence,
+        "book_context": book_summary,
+        "summary": page_summary,
+        "image_prompt": image_prompt,
+        "image": image,
+        "evaluation": evaluation
+    }

app/app/main.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from fastapi import FastAPI, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+import shutil
+from app.agent import run_agent
+app = FastAPI()
+# Enable CORS for Streamlit Cloud
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Allows all origins
+    allow_credentials=True,
+    allow_methods=["*"],  # Allows all methods
+    allow_headers=["*"],  # Allows all headers
+)
+@app.post("/process-page/")
+async def process_page(
+    book_name: str,
+    file: UploadFile,
+    author_name: str = ""
+):
+    import tempfile
+    import os
+    import traceback
+    from fastapi.responses import JSONResponse
+    try:
+        # Create a temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as tmp:
+            shutil.copyfileobj(file.file, tmp)
+            image_path = tmp.name
+        import base64
+        result = run_agent(image_path, book_name, author_name)
+        image_b64 = ""
+        if result["image"]:
+            image_b64 = base64.b64encode(result["image"]).decode("utf-8")
+        return {
+            "ocr_text": result["ocr_text"],
+            "ocr_confidence": result["ocr_confidence"],
+            "book_context": result["book_context"],
+            "summary": result["summary"],
+            "image_prompt": result["image_prompt"],
+            "image": image_b64
+        }
+    except Exception as e:
+        error_msg = f"Server Error: {str(e)}\n{traceback.format_exc()}"
+        print(error_msg)
+        return JSONResponse(status_code=500, content={"detail": error_msg})

app/app/schema.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from pydantic import BaseModel
+from typing import List
+class OCRResult(BaseModel):
+    text: str
+    confidence: float
+class PageSummary(BaseModel):
+    summary: str
+    key_entities: List[str]
+    emotions: List[str]
+class ImagePrompt(BaseModel):
+    prompt: str
+    style: str
+    mood: str
+class EvaluationResult(BaseModel):
+    faithfulness_score: int
+    hallucination: bool

evaluation/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # Evaluation module

evaluation/evaluation/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (172 Bytes). View file

evaluation/evaluation/__pycache__/evaluation.cpython-310.pyc ADDED Viewed

Binary file (2.07 kB). View file

evaluation/evaluation/evaluation.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from huggingface_hub import InferenceClient
+import os
+import json
+from dotenv import load_dotenv
+load_dotenv()
+HF_API_KEY = os.getenv("HF_API_KEY")
+client = InferenceClient(token=HF_API_KEY)
+def evaluate_summary(ocr_text: str, summary: str) -> dict:
+    """
+    Evaluate the faithfulness of a summary against the original OCR text.
+    Returns a dict with faithfulness_score (1-5) and hallucination (bool).
+    """
+    prompt = f"""You are an evaluation assistant. Compare the original OCR text with the generated summary.
+ORIGINAL OCR TEXT:
+{ocr_text}
+GENERATED SUMMARY:
+{summary}
+Evaluate:
+1. Faithfulness Score (1-5): How accurately does the summary reflect the original text?
+   - 5: Perfect, all details are accurate
+   - 4: Very good, minor omissions
+   - 3: Acceptable, some details missing or slightly off
+   - 2: Poor, significant inaccuracies
+   - 1: Very poor, mostly inaccurate
+2. Hallucination: Does the summary contain information NOT present in the original text?
+Respond ONLY with valid JSON in this exact format:
+{{"faithfulness_score": <int 1-5>, "hallucination": <true/false>}}"""
+    try:
+        response = client.chat_completion(
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            model="HuggingFaceH4/zephyr-7b-beta",
+            max_tokens=100,
+            temperature=0.1
+        )
+        result_text = response.choices[0].message.content.strip()
+        # Try to parse JSON from the response
+        try:
+            # Find JSON in the response
+            start = result_text.find('{')
+            end = result_text.rfind('}') + 1
+            if start != -1 and end > start:
+                result = json.loads(result_text[start:end])
+                return {
+                    "faithfulness_score": result.get("faithfulness_score", 3),
+                    "hallucination": result.get("hallucination", False)
+                }
+        except json.JSONDecodeError:
+            pass
+        # Default fallback
+        return {"faithfulness_score": 3, "hallucination": False}
+    except Exception as e:
+        print(f"Evaluation error: {e}")
+        return {"faithfulness_score": 0, "hallucination": False, "error": str(e)}

tools/tools/__pycache__/image_gen.cpython-310.pyc ADDED Viewed

Binary file (910 Bytes). View file

tools/tools/__pycache__/ocr.cpython-310.pyc ADDED Viewed

Binary file (1.21 kB). View file

tools/tools/__pycache__/prompt_generator.cpython-310.pyc ADDED Viewed

Binary file (4.41 kB). View file

tools/tools/__pycache__/summarizer.cpython-310.pyc ADDED Viewed

Binary file (2 kB). View file

tools/tools/__pycache__/web_search.cpython-310.pyc ADDED Viewed

Binary file (2.12 kB). View file

tools/tools/image_gen.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from huggingface_hub import InferenceClient
+import os
+from dotenv import load_dotenv
+load_dotenv()
+HF_API_KEY = os.getenv("HF_API_KEY")
+HF_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
+def generate_image(prompt: str):
+    """Use HuggingFace Hub InferenceClient for image generation"""
+    client = InferenceClient(token=HF_API_KEY)
+    try:
+        # Generate image using text-to-image
+        image = client.text_to_image(
+            prompt,
+            model=HF_MODEL
+        )
+        # Convert PIL Image to bytes
+        from io import BytesIO
+        img_byte_arr = BytesIO()
+        image.save(img_byte_arr, format='PNG')
+        return img_byte_arr.getvalue()
+    except Exception as e:
+        print(f"Image generation error: {str(e)}")
+        return b""  # Return empty bytes on error

tools/tools/ocr.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import cv2
+import pytesseract
+import os
+import shutil
+# Check for TESSERACT_PATH env var, else default
+tesseract_cmd = os.getenv("TESSERACT_PATH", r"C:\Program Files\Tesseract-OCR\tesseract.exe")
+if not os.path.exists(tesseract_cmd):
+    # Try to find in PATH
+    tesseract_cmd_shutil = shutil.which("tesseract")
+    if tesseract_cmd_shutil:
+        tesseract_cmd = tesseract_cmd_shutil
+    else:
+        print(f"Warning: Tesseract not found at {tesseract_cmd}. OCR may fail.")
+pytesseract.pytesseract.tesseract_cmd = tesseract_cmd
+def run_ocr(image_path: str):
+    img = cv2.imread(image_path)
+    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+    data = pytesseract.image_to_data(
+        gray, output_type=pytesseract.Output.DICT
+    )
+    text = " ".join([t for t in data["text"] if t.strip()])
+    # Filter valid confidence values (tesseract returns -1 for invalid)
+    confs = []
+    for c in data["conf"]:
+        try:
+            val = int(c)
+            if val >= 0:
+                confs.append(val)
+        except (ValueError, TypeError):
+            pass
+    confidence = sum(confs) / len(confs) / 100 if confs else 0.0
+    return text.strip(), confidence

tools/tools/prompt_generator.py ADDED Viewed

	@@ -0,0 +1,144 @@

+from huggingface_hub import InferenceClient
+import os
+from dotenv import load_dotenv
+load_dotenv()
+HF_API_KEY = os.getenv("HF_API_KEY")
+client = InferenceClient(token=HF_API_KEY)
+def extract_book_metadata(book_context: str) -> dict:
+    """Extract structured metadata from Open Library context."""
+    metadata = {
+        "title": "",
+        "author": "",
+        "year": "",
+        "genre": "",
+        "subjects": ""
+    }
+    if not book_context:
+        return metadata
+    for line in book_context.split("\n"):
+        if line.startswith("Title:"):
+            metadata["title"] = line.replace("Title:", "").strip()
+        elif line.startswith("Author:"):
+            metadata["author"] = line.replace("Author:", "").strip()
+        elif line.startswith("First Published:"):
+            metadata["year"] = line.replace("First Published:", "").strip()
+        elif line.startswith("Subjects:"):
+            metadata["subjects"] = line.replace("Subjects:", "").strip()
+            metadata["genre"] = metadata["subjects"].split(",")[0].strip()
+    return metadata
+def get_era_style(year: str) -> str:
+    """Map publication year to artistic era and style."""
+    try:
+        yr = int(year)
+        if yr < 1800:
+            return "classical painting style, baroque or renaissance aesthetics, rich oil painting textures"
+        elif yr < 1850:
+            return "romantic era illustration, dramatic landscapes, emotional intensity, JMW Turner inspired"
+        elif yr < 1900:
+            return "Victorian illustration style, detailed engravings, Pre-Raphaelite influences, realistic portraiture"
+        elif yr < 1950:
+            return "early 20th century illustration, art nouveau elements, golden age illustration style"
+        elif yr < 2000:
+            return "mid-century illustration, bold compositions, realistic rendering"
+        else:
+            return "contemporary digital art, cinematic composition, photorealistic elements"
+    except:
+        return "classical book illustration style"
+def refine_prompt_with_llm(scene_summary: str, book_context: str, metadata: dict) -> str:
+    """Use LLM to create a refined, thematic prompt."""
+    era_style = get_era_style(metadata.get("year", ""))
+    try:
+        response = client.chat_completion(
+            messages=[
+                {
+                    "role": "system",
+                    "content": """You are an expert art director creating image prompts for book illustrations.
+Your task is to convert a scene description into a detailed visual prompt that:
+1. Preserves the literary theme and mood of the book
+2. Uses period-appropriate visual style
+3. Focuses on concrete visual elements (lighting, composition, colors)
+4. Avoids inventing details not in the scene
+Output ONLY the refined prompt, no explanations."""
+                },
+                {
+                    "role": "user",
+                    "content": f"""Create an illustration prompt for this scene:
+BOOK: {metadata.get('title', 'Unknown')} by {metadata.get('author', 'Unknown')}
+ERA: {metadata.get('year', 'Unknown')}
+GENRE: {metadata.get('genre', 'Literary Fiction')}
+RECOMMENDED STYLE: {era_style}
+SCENE TO ILLUSTRATE:
+{scene_summary}
+Generate a detailed, visual prompt that captures the essence of this scene while staying true to the book's era and theme."""
+                }
+            ],
+            model="HuggingFaceH4/zephyr-7b-beta",
+            max_tokens=400,
+            temperature=0.5
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        print(f"LLM refinement failed: {e}")
+        return None
+def generate_image_prompt(page_summary: str, book_context: str) -> str:
+    """
+    Generate a refined, theme-preserving image prompt.
+    Uses LLM to enhance the prompt with book-specific style.
+    """
+    # Extract metadata from book context
+    metadata = extract_book_metadata(book_context)
+    # Get era-appropriate style
+    era_style = get_era_style(metadata.get("year", ""))
+    # Try LLM refinement
+    refined_prompt = refine_prompt_with_llm(page_summary, book_context, metadata)
+    if refined_prompt:
+        # Add quality modifiers to LLM output
+        final_prompt = f"""masterpiece, best quality, highly detailed illustration
+{refined_prompt}
+STYLE: {era_style}
+QUALITY: professional book illustration, sharp details, rich textures"""
+    else:
+        # Fallback to template-based prompt
+        final_prompt = f"""masterpiece, best quality, highly detailed illustration
+BOOK: {metadata.get('title', 'Unknown')} ({metadata.get('year', '')})
+GENRE: {metadata.get('genre', 'Literary Fiction')}
+SCENE:
+{page_summary}
+STYLE: {era_style}
+ATMOSPHERE: Faithful to the literary source, emotionally resonant
+QUALITY: professional book illustration, sharp details, rich textures"""
+    return final_prompt.strip()
+def validate_prompt(prompt: str, page_summary: str) -> bool:
+    """Validates prompt is correctly formatted."""
+    return "SCENE" in prompt or "illustration" in prompt.lower()

tools/tools/summarizer.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from huggingface_hub import InferenceClient
+import os
+from dotenv import load_dotenv
+load_dotenv()
+HF_API_KEY = os.getenv("HF_API_KEY")
+client = InferenceClient(token=HF_API_KEY)
+SYSTEM_PROMPT = """You are an expert literary analyst. Your task is to analyze book page text and extract key visual and narrative elements.
+You must respond in the following structured format:
+**SCENE DESCRIPTION**: A vivid 2-3 sentence description of what is happening in this passage.
+**CHARACTERS**: List any characters mentioned with brief descriptions (appearance, emotion, action).
+**SETTING**: Describe the physical location, time of day, weather, and atmosphere.
+**MOOD**: The emotional tone (e.g., tense, romantic, melancholic, adventurous).
+**KEY VISUAL ELEMENTS**: List 3-5 specific objects, colors, or visual details mentioned.
+**ACTION**: What is the main action or event occurring?
+Be specific and focus on visually representable details. If information is not available, make reasonable inferences based on context."""
+def summarize_page(ocr_text: str) -> str:
+    """Extract structured visual elements from book page text"""
+    if not ocr_text or len(ocr_text.strip()) < 20:
+        return "Insufficient text extracted from the image."
+    try:
+        response = client.chat_completion(
+            messages=[
+                {
+                    "role": "system",
+                    "content": SYSTEM_PROMPT
+                },
+                {
+                    "role": "user",
+                    "content": f"""Analyze the following book page text and extract visual elements for illustration:
+---
+{ocr_text}
+---
+Provide your structured analysis:"""
+                }
+            ],
+            model="HuggingFaceH4/zephyr-7b-beta",
+            max_tokens=800,
+            temperature=0.4
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        return f"Error during summarization: {str(e)}"

tools/tools/web_search.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import requests
+from urllib.parse import quote
+def fetch_book_summary(book_name: str, author_name: str = "") -> str:
+    """
+    Fetch book summary from Open Library API.
+    Uses both book name and author for accurate results.
+    """
+    if not book_name or len(book_name.strip()) < 2:
+        return ""
+    # Build search query with author if provided
+    search_query = book_name
+    if author_name:
+        search_query = f"{book_name} {author_name}"
+    # Strategy 1: Open Library Search API
+    try:
+        search_url = "https://openlibrary.org/search.json"
+        params = {
+            "title": book_name,
+            "limit": 1
+        }
+        if author_name:
+            params["author"] = author_name
+        r = requests.get(search_url, params=params, timeout=10)
+        if r.status_code == 200:
+            data = r.json()
+            docs = data.get("docs", [])
+            if docs:
+                book = docs[0]
+                title = book.get("title", book_name)
+                authors = ", ".join(book.get("author_name", ["Unknown"]))
+                first_sentence = " ".join(book.get("first_sentence", [""]))
+                subjects = ", ".join(book.get("subject", [])[:5])
+                publish_year = book.get("first_publish_year", "Unknown")
+                summary = f"Title: {title}\n"
+                summary += f"Author: {authors}\n"
+                summary += f"First Published: {publish_year}\n"
+                if subjects:
+                    summary += f"Subjects: {subjects}\n"
+                if first_sentence:
+                    summary += f"Opening: {first_sentence}\n"
+                # Try to get description from work
+                work_key = book.get("key", "")
+                if work_key:
+                    try:
+                        work_url = f"https://openlibrary.org{work_key}.json"
+                        wr = requests.get(work_url, timeout=5)
+                        if wr.status_code == 200:
+                            work_data = wr.json()
+                            desc = work_data.get("description", "")
+                            if isinstance(desc, dict):
+                                desc = desc.get("value", "")
+                            if desc:
+                                summary += f"\nDescription: {desc[:500]}"
+                    except:
+                        pass
+                return summary
+    except Exception as e:
+        print(f"Open Library failed: {e}")
+    # Strategy 2: DuckDuckGo Instant Answers
+    try:
+        ddg_url = f"https://api.duckduckgo.com/?q={quote(search_query + ' book')}&format=json&no_html=1"
+        r = requests.get(ddg_url, timeout=10)
+        if r.status_code == 200:
+            data = r.json()
+            abstract = data.get("Abstract", "")
+            if abstract:
+                return f"DuckDuckGo: {abstract}"
+    except Exception as e:
+        print(f"DuckDuckGo failed: {e}")
+    return f"No book information found for '{book_name}'" + (f" by {author_name}" if author_name else "")