Spaces:

userIdc2024
/

Video-Generator-Tools

Sleeping

App Files Files Community

userIdc2024 commited on Oct 10, 2025

Commit

1ab7bf0

verified ·

1 Parent(s): 2d2e74c

Delete prompt_generator.py

Browse files

Files changed (1) hide show

prompt_generator.py +0 -235

prompt_generator.py DELETED Viewed

@@ -1,235 +0,0 @@
-from typing import List, Optional, Dict, Any
-from pydantic import BaseModel, Field
-from openai import OpenAI
-import os
-import re
-from dotenv import load_dotenv
-import base64
-load_dotenv()
-gpt_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-class VeoInputs(BaseModel):
-    script: str
-    style: str
-    jsonFormat: str = 'standard'
-    continuationMode: bool = True
-    voiceType: Optional[str] = None
-    energyLevel: Optional[str] = None
-    settingMode: str = 'single'
-    cameraStyle: Optional[str] = None
-    energyArc: Optional[str] = None
-    narrativeStyle: Optional[str] = None
-    accentRegion: Optional[str] = None
-class ContinuityMarkers(BaseModel):
-    start_position: str
-    end_position: str
-    start_expression: str
-    end_expression: str
-    start_gesture: str
-    end_gesture: str
-    location_status: str
-class SegmentInfo(BaseModel):
-    segment_number: int
-    total_segments: int
-    duration: str
-    location: str
-    continuity_markers: ContinuityMarkers
-class CharacterDescription(BaseModel):
-    current_state: str     # 100+ words, segment-specific
-    voice_matching: str    # 100+ words, segment-specific
-class SynchronizedActions(BaseModel):
-    # Use legal Python identifiers; map to exact JSON keys with aliases
-    f0000_0002: str = Field(alias="0:00-0:02")
-    f0002_0004: str = Field(alias="0:02-0:04")
-    f0004_0006: str = Field(alias="0:04-0:06")
-    f0006_0008: str = Field(alias="0:06-0:08")
-    class Config:
-        populate_by_name = True
-class ActionTimeline(BaseModel):
-    dialogue: str
-    synchronized_actions: SynchronizedActions
-    micro_expressions: str   # 50+ words
-    breathing_rhythm: str
-    location_transition: str
-    continuity_checkpoint: str
-class SceneContinuity(BaseModel):
-    environment: str           # 250+ words
-    camera_position: str       # 75+ words
-    camera_movement: str       # detailed movement path
-    lighting_state: str        # 50+ words
-    background_elements: str   # 50+ words
-    spatial_relationships: str
-class Segment(BaseModel):
-    segment_info: SegmentInfo
-    character_description: CharacterDescription
-    scene_continuity: SceneContinuity
-    action_timeline: ActionTimeline
-class SegmentsPayload(BaseModel):
-    segments: List[Segment]
-def split_script_into_segments(script: str, seconds_per_segment: int = 8, words_per_second: float = 2.2) -> List[str]:
-    """
-    Packs sentences into ~seconds * words_per_second buckets (≈ 17-20 words/8s).
-    Adjust words_per_second if your VO tempo differs.
-    """
-    sentences = re.split(r'(?<=[.!?])\s+', script.strip())
-    sentences = [s.strip() for s in sentences if s.strip()]
-    target = max(14, int(seconds_per_segment * words_per_second))  # minimal guard
-    segments, cur, cur_len = [], [], 0
-    for s in sentences:
-        w = len(s.split())
-        if cur and cur_len + w > target:
-            segments.append(" ".join(cur))
-            cur, cur_len = [], 0
-        cur.append(s)
-        cur_len += w
-    if cur:
-        segments.append(" ".join(cur))
-    return segments or [script.strip()]
-def build_prompt(inputs: VeoInputs, segment_texts: List[str]) -> str:
-    N = len(segment_texts)
-    knobs = inputs.model_dump()
-    header = f"""
-You are a senior performance-marketing video director who writes segment-accurate, production-grade JSON prompts for Veo 3.
-Return ONLY JSON that parses into the provided schema. Do not add fields. No markdown.
-Task: Build prompts for exactly {N} segments of 8 seconds each.
-Hard rules for EVERY segment:
-- "duration" MUST be "00:00-00:8"
-- "current_state" = 100+ words, segment-specific
-- "voice_matching" = 100+ words, segment-specific
-- "environment" = 250+ words; "camera_position" = 75+ words; "lighting_state" = 50+ words min
-- "camera_movement" = concrete, timestamped path (pan/tilt/dolly/handheld/steadicam)
-- "synchronized_actions" must have exactly these keys: "0:00-0:02","0:02-0:04","0:04-0:06","0:06-0:08","0:08-0:10"
-- Dialogue must fit in 10s naturally with breath points.
-- If continuationMode is true, include a continuity checkpoint aligning next segment’s start.
-- Set "segment_info.total_segments" = {N} on each segment.
-- Based on the character image provide select everything as asked.
-FULL SCRIPT:
-\"\"\"{inputs.script.strip()}\"\"\"
-AUTHORITATIVE SETTINGS (must be reflected):
-{knobs}
-SEGMENT LINES (cover in exactly 8 seconds each):
-"""
-    seg_lines = "\n".join([f"- Segment {i+1}: {t}" for i, t in enumerate(segment_texts)])
-    footer = """
-OUTPUT:
-Return JSON only as:
-{
-  "segments": [ { ... per-segment object exactly matching the schema ... } ]
-}
-"""
-    return header + seg_lines + footer
-# ---------- Validator (segment count, durations, keys, word counts, uniformity) ----------
-MIN_WORDS = {
-    ("character_description", "physical"): 200,
-    ("character_description", "clothing"): 150,
-    ("character_description", "current_state"): 100,
-    ("character_description", "voice_matching"): 100,
-    ("scene_continuity", "environment"): 250,
-    ("scene_continuity", "camera_position"): 75,
-    ("scene_continuity", "lighting_state"): 50,
-    ("scene_continuity", "props_in_frame"): 75,
-    ("scene_continuity", "background_elements"): 50,
-    ("action_timeline", "micro_expressions"): 50,
-}
-def _word_count(text: str) -> int:
-    return len(re.findall(r"\b\w+\b", text or ""))
-def validate_segments_payload(payload: Dict[str, Any], expected_segments: int) -> List[str]:
-    errors: List[str] = []
-    segs = payload.get("segments", [])
-    if len(segs) != expected_segments:
-        errors.append(f"Expected {expected_segments} segments, got {len(segs)}.")
-    required_sync_keys = {"0:00-0:02","0:02-0:04","0:04-0:06","0:06-0:08", "0:08-0:10"}
-    physical_blocks, clothing_blocks = [], []
-    for i, seg in enumerate(segs, start=1):
-        si = seg.get("segment_info", {})
-        if si.get("duration") != "00:00-00:10":
-            errors.append(f"Segment {i}: duration must be 00:00-00:10.")
-        if si.get("total_segments") != expected_segments:
-            errors.append(f"Segment {i}: total_segments should be {expected_segments}, got {si.get('total_segments')}.")
-        sync = seg.get("action_timeline", {}).get("synchronized_actions", {})
-        if set(sync.keys()) != required_sync_keys:
-            errors.append(f"Segment {i}: synchronized_actions must have keys {sorted(required_sync_keys)}.")
-        # Word-count checks
-        for (section, field), minw in MIN_WORDS.items():
-            text = seg.get(section, {}).get(field, "")
-            wc = _word_count(text)
-            if wc < minw:
-                errors.append(f"Segment {i}: {section}.{field} must be >= {minw} words (got {wc}).")
-        ch = seg.get("character_description", {})
-        physical_blocks.append(ch.get("physical", ""))
-        clothing_blocks.append(ch.get("clothing", ""))
-    # Uniformity across segments
-    if expected_segments > 1:
-        if len(set(physical_blocks)) > 1:
-            errors.append("`character_description.physical` must be EXACTLY identical across all segments.")
-        if len(set(clothing_blocks)) > 1:
-            errors.append("`character_description.clothing` must be EXACTLY identical across all segments.")
-    return errors
-def generate_segments_payload(
-    inputs: VeoInputs,
-    image_path: str = None,
-    model: str = "gpt-4o",
-) -> Dict[str, Any]:
-    segment_texts = split_script_into_segments(inputs.script, seconds_per_segment=8)
-    N = len(segment_texts)
-    print(N)
-    encoded_image = base64.b64encode(image_path).decode("utf-8")
-    def _call_llm(user_prompt: str):
-        return gpt_client.beta.chat.completions.parse(
-        model=model,
-        response_format=SegmentsPayload,
-        messages=[
-            {"role": "system", "content": "You are a precise JSON-only generator that must satisfy a strict schema and explicit segment count."},
-            {
-                "role": "user",
-                "content": [
-                    {"type": "text", "text": user_prompt},
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/jpeg;base64,{encoded_image}"
-                        },
-                    },
-                ],
-            },
-        ],
-        ).choices[0].message.parsed
-    user_prompt = build_prompt(inputs, segment_texts)
-    parsed_obj = _call_llm(user_prompt)
-    payload = parsed_obj.model_dump(by_alias=True)
-    return payload