""" utils.py — PDF extraction, Gemini LLM, and Manim code helpers. """ from __future__ import annotations import re from google import genai from google.genai import types # ── PDF Text Extraction ─────────────────────────────────────────────────────── def extract_pdf_text(pdf_path: str) -> str: """Extract plain text from a PDF using pypdf.""" from pypdf import PdfReader reader = PdfReader(pdf_path) pages = [] for page in reader.pages: text = page.extract_text() if text: pages.append(text) return "\n\n".join(pages) # ── Gemini LLM ──────────────────────────────────────────────────────────────── def generate_manim_code(prompt_text: str, api_key: str) -> str: """Stream Manim code from Gemini 3 Flash Preview and return it as a string.""" client = genai.Client(api_key=api_key) model = "gemini-3-flash-preview" # Robust system prompt system_prompt = """ You are an expert Python developer and Manim animation engineer. Always produce code that: - Is fully correct and runnable with Manim v1.0+. - Uses clear, readable structure, proper imports, and PEP8-compliant formatting. - Produces visually correct animations based on the user's description. - Minimizes unnecessary complexity but keeps clarity. - Names the main scene class `OutputVideo`. - Returns code only, without extra explanation or markdown fences. - Handles edge cases gracefully. - Uses comments sparingly, only when clarifying complex parts. - When in doubt, use Google search effectively to verify facts, functions, or best practices. Always ensure your output is precise, accurate, and complete. """ contents = [ types.Content( role="system", parts=[types.Part.from_text(text=system_prompt)] ), types.Content( role="user", parts=[types.Part.from_text(text=prompt_text)] ) ] config = types.GenerateContentConfig( thinking_config=types.ThinkingConfig( thinking_level="HIGH" ) ) code = "" for chunk in client.models.generate_content_stream( model=model, contents=contents, config=config ): if chunk.text: code += chunk.text print(chunk.text, end="", flush=True) return code # ── Manim Code Sanitisation ─────────────────────────────────────────────────── def sanitize_manim_code(raw: str) -> str: """ Strip markdown fences, ensure correct imports and class name. """ code = re.sub(r"^```(?:python)?\s*", "", raw.strip(), flags=re.MULTILINE) code = re.sub(r"\s*```$", "", code.strip(), flags=re.MULTILINE) if "from manim import" not in code and "import manim" not in code: code = "from manim import *\n\n" + code # Ensure class is named OutputVideo code = re.sub(r"class\s+\w+\s*\(\s*Scene\s*\)", "class OutputVideo(Scene)", code) return code