from huggingface_hub import InferenceClient
import os
from dotenv import load_dotenv

load_dotenv()

HF_API_KEY = os.getenv("HF_API_KEY")

client = InferenceClient(token=HF_API_KEY)

SYSTEM_PROMPT = """You are an expert literary analyst. Your task is to analyze book page text and extract key visual and narrative elements.

You must respond in the following structured format:

**SCENE DESCRIPTION**: A vivid 2-3 sentence description of what is happening in this passage.

**CHARACTERS**: List any characters mentioned with brief descriptions (appearance, emotion, action).

**SETTING**: Describe the physical location, time of day, weather, and atmosphere.

**MOOD**: The emotional tone (e.g., tense, romantic, melancholic, adventurous).

**KEY VISUAL ELEMENTS**: List 3-5 specific objects, colors, or visual details mentioned.

**ACTION**: What is the main action or event occurring?

Be specific and focus on visually representable details. If information is not available, make reasonable inferences based on context."""

def summarize_page(ocr_text: str) -> str:
    """Extract structured visual elements from book page text"""
    
    if not ocr_text or len(ocr_text.strip()) < 20:
        return "Insufficient text extracted from the image."
    
    try:
        response = client.chat_completion(
            messages=[
                {
                    "role": "system",
                    "content": SYSTEM_PROMPT
                },
                {
                    "role": "user",
                    "content": f"""Analyze the following book page text and extract visual elements for illustration:

---
{ocr_text}
---

Provide your structured analysis:"""
                }
            ],
            model="google/gemma-2-2b-it",
            max_tokens=800,
            temperature=0.4
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Error during summarization: {str(e)}"