from huggingface_hub import InferenceClient import os from dotenv import load_dotenv load_dotenv() HF_API_KEY = os.getenv("HF_API_KEY") client = InferenceClient(token=HF_API_KEY) SYSTEM_PROMPT = """You are an expert literary analyst. Your task is to analyze book page text and extract key visual and narrative elements. You must respond in the following structured format: **SCENE DESCRIPTION**: A vivid 2-3 sentence description of what is happening in this passage. **CHARACTERS**: List any characters mentioned with brief descriptions (appearance, emotion, action). **SETTING**: Describe the physical location, time of day, weather, and atmosphere. **MOOD**: The emotional tone (e.g., tense, romantic, melancholic, adventurous). **KEY VISUAL ELEMENTS**: List 3-5 specific objects, colors, or visual details mentioned. **ACTION**: What is the main action or event occurring? Be specific and focus on visually representable details. If information is not available, make reasonable inferences based on context.""" def summarize_page(ocr_text: str) -> str: """Extract structured visual elements from book page text""" if not ocr_text or len(ocr_text.strip()) < 20: return "Insufficient text extracted from the image." try: response = client.chat_completion( messages=[ { "role": "system", "content": SYSTEM_PROMPT }, { "role": "user", "content": f"""Analyze the following book page text and extract visual elements for illustration: --- {ocr_text} --- Provide your structured analysis:""" } ], model="google/gemma-2-2b-it", max_tokens=800, temperature=0.4 ) return response.choices[0].message.content except Exception as e: return f"Error during summarization: {str(e)}"