BookVisionAI / tools /summarizer.py
namanraj's picture
Use google/gemma-2-2b-it for free chat completion
6925599
from huggingface_hub import InferenceClient
import os
from dotenv import load_dotenv
load_dotenv()
HF_API_KEY = os.getenv("HF_API_KEY")
client = InferenceClient(token=HF_API_KEY)
SYSTEM_PROMPT = """You are an expert literary analyst. Your task is to analyze book page text and extract key visual and narrative elements.
You must respond in the following structured format:
**SCENE DESCRIPTION**: A vivid 2-3 sentence description of what is happening in this passage.
**CHARACTERS**: List any characters mentioned with brief descriptions (appearance, emotion, action).
**SETTING**: Describe the physical location, time of day, weather, and atmosphere.
**MOOD**: The emotional tone (e.g., tense, romantic, melancholic, adventurous).
**KEY VISUAL ELEMENTS**: List 3-5 specific objects, colors, or visual details mentioned.
**ACTION**: What is the main action or event occurring?
Be specific and focus on visually representable details. If information is not available, make reasonable inferences based on context."""
def summarize_page(ocr_text: str) -> str:
"""Extract structured visual elements from book page text"""
if not ocr_text or len(ocr_text.strip()) < 20:
return "Insufficient text extracted from the image."
try:
response = client.chat_completion(
messages=[
{
"role": "system",
"content": SYSTEM_PROMPT
},
{
"role": "user",
"content": f"""Analyze the following book page text and extract visual elements for illustration:
---
{ocr_text}
---
Provide your structured analysis:"""
}
],
model="google/gemma-2-2b-it",
max_tokens=800,
temperature=0.4
)
return response.choices[0].message.content
except Exception as e:
return f"Error during summarization: {str(e)}"