Spaces:
Running
Running
| """ | |
| utils.py — PDF extraction, Gemini LLM, and Manim code helpers. | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from google import genai | |
| from google.genai import types | |
| # ── PDF Text Extraction ─────────────────────────────────────────────────────── | |
| def extract_pdf_text(pdf_path: str) -> str: | |
| """Extract plain text from a PDF using pypdf.""" | |
| from pypdf import PdfReader | |
| reader = PdfReader(pdf_path) | |
| pages = [] | |
| for page in reader.pages: | |
| text = page.extract_text() | |
| if text: | |
| pages.append(text) | |
| return "\n\n".join(pages) | |
| # ── Gemini LLM ──────────────────────────────────────────────────────────────── | |
| def generate_manim_code(prompt_text: str, api_key: str) -> str: | |
| """Stream Manim code from Gemini 3 Flash Preview and return it as a string.""" | |
| client = genai.Client(api_key=api_key) | |
| model = "gemini-3-flash-preview" | |
| # Robust system prompt | |
| system_prompt = """ | |
| You are an expert Python developer and Manim animation engineer. | |
| Always produce code that: | |
| - Is fully correct and runnable with Manim v1.0+. | |
| - Uses clear, readable structure, proper imports, and PEP8-compliant formatting. | |
| - Produces visually correct animations based on the user's description. | |
| - Minimizes unnecessary complexity but keeps clarity. | |
| - Names the main scene class `OutputVideo`. | |
| - Returns code only, without extra explanation or markdown fences. | |
| - Handles edge cases gracefully. | |
| - Uses comments sparingly, only when clarifying complex parts. | |
| - When in doubt, use Google search effectively to verify facts, functions, or best practices. | |
| Always ensure your output is precise, accurate, and complete. | |
| """ | |
| contents = [ | |
| types.Content( | |
| role="system", | |
| parts=[types.Part.from_text(text=system_prompt)] | |
| ), | |
| types.Content( | |
| role="user", | |
| parts=[types.Part.from_text(text=prompt_text)] | |
| ) | |
| ] | |
| config = types.GenerateContentConfig( | |
| thinking_config=types.ThinkingConfig( | |
| thinking_level="HIGH" | |
| ) | |
| ) | |
| code = "" | |
| for chunk in client.models.generate_content_stream( | |
| model=model, | |
| contents=contents, | |
| config=config | |
| ): | |
| if chunk.text: | |
| code += chunk.text | |
| print(chunk.text, end="", flush=True) | |
| return code | |
| # ── Manim Code Sanitisation ─────────────────────────────────────────────────── | |
| def sanitize_manim_code(raw: str) -> str: | |
| """ | |
| Strip markdown fences, ensure correct imports and class name. | |
| """ | |
| code = re.sub(r"^```(?:python)?\s*", "", raw.strip(), flags=re.MULTILINE) | |
| code = re.sub(r"\s*```$", "", code.strip(), flags=re.MULTILINE) | |
| if "from manim import" not in code and "import manim" not in code: | |
| code = "from manim import *\n\n" + code | |
| # Ensure class is named OutputVideo | |
| code = re.sub(r"class\s+\w+\s*\(\s*Scene\s*\)", "class OutputVideo(Scene)", code) | |
| return code | |