userIdc2024 commited on
Commit
1ab7bf0
·
verified ·
1 Parent(s): 2d2e74c

Delete prompt_generator.py

Browse files
Files changed (1) hide show
  1. prompt_generator.py +0 -235
prompt_generator.py DELETED
@@ -1,235 +0,0 @@
1
- from typing import List, Optional, Dict, Any
2
- from pydantic import BaseModel, Field
3
- from openai import OpenAI
4
- import os
5
- import re
6
- from dotenv import load_dotenv
7
- import base64
8
-
9
- load_dotenv()
10
-
11
- gpt_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
12
-
13
- class VeoInputs(BaseModel):
14
- script: str
15
- style: str
16
- jsonFormat: str = 'standard'
17
- continuationMode: bool = True
18
- voiceType: Optional[str] = None
19
- energyLevel: Optional[str] = None
20
- settingMode: str = 'single'
21
- cameraStyle: Optional[str] = None
22
- energyArc: Optional[str] = None
23
- narrativeStyle: Optional[str] = None
24
- accentRegion: Optional[str] = None
25
-
26
- class ContinuityMarkers(BaseModel):
27
- start_position: str
28
- end_position: str
29
- start_expression: str
30
- end_expression: str
31
- start_gesture: str
32
- end_gesture: str
33
- location_status: str
34
-
35
- class SegmentInfo(BaseModel):
36
- segment_number: int
37
- total_segments: int
38
- duration: str
39
- location: str
40
- continuity_markers: ContinuityMarkers
41
-
42
- class CharacterDescription(BaseModel):
43
- current_state: str # 100+ words, segment-specific
44
- voice_matching: str # 100+ words, segment-specific
45
-
46
- class SynchronizedActions(BaseModel):
47
- # Use legal Python identifiers; map to exact JSON keys with aliases
48
- f0000_0002: str = Field(alias="0:00-0:02")
49
- f0002_0004: str = Field(alias="0:02-0:04")
50
- f0004_0006: str = Field(alias="0:04-0:06")
51
- f0006_0008: str = Field(alias="0:06-0:08")
52
-
53
- class Config:
54
- populate_by_name = True
55
-
56
- class ActionTimeline(BaseModel):
57
- dialogue: str
58
- synchronized_actions: SynchronizedActions
59
- micro_expressions: str # 50+ words
60
- breathing_rhythm: str
61
- location_transition: str
62
- continuity_checkpoint: str
63
-
64
- class SceneContinuity(BaseModel):
65
- environment: str # 250+ words
66
- camera_position: str # 75+ words
67
- camera_movement: str # detailed movement path
68
- lighting_state: str # 50+ words
69
- background_elements: str # 50+ words
70
- spatial_relationships: str
71
-
72
- class Segment(BaseModel):
73
- segment_info: SegmentInfo
74
- character_description: CharacterDescription
75
- scene_continuity: SceneContinuity
76
- action_timeline: ActionTimeline
77
-
78
- class SegmentsPayload(BaseModel):
79
- segments: List[Segment]
80
-
81
- def split_script_into_segments(script: str, seconds_per_segment: int = 8, words_per_second: float = 2.2) -> List[str]:
82
- """
83
- Packs sentences into ~seconds * words_per_second buckets (≈ 17-20 words/8s).
84
- Adjust words_per_second if your VO tempo differs.
85
- """
86
- sentences = re.split(r'(?<=[.!?])\s+', script.strip())
87
- sentences = [s.strip() for s in sentences if s.strip()]
88
- target = max(14, int(seconds_per_segment * words_per_second)) # minimal guard
89
-
90
- segments, cur, cur_len = [], [], 0
91
- for s in sentences:
92
- w = len(s.split())
93
- if cur and cur_len + w > target:
94
- segments.append(" ".join(cur))
95
- cur, cur_len = [], 0
96
- cur.append(s)
97
- cur_len += w
98
- if cur:
99
- segments.append(" ".join(cur))
100
- return segments or [script.strip()]
101
-
102
- def build_prompt(inputs: VeoInputs, segment_texts: List[str]) -> str:
103
- N = len(segment_texts)
104
- knobs = inputs.model_dump()
105
- header = f"""
106
- You are a senior performance-marketing video director who writes segment-accurate, production-grade JSON prompts for Veo 3.
107
- Return ONLY JSON that parses into the provided schema. Do not add fields. No markdown.
108
-
109
- Task: Build prompts for exactly {N} segments of 8 seconds each.
110
- Hard rules for EVERY segment:
111
- - "duration" MUST be "00:00-00:8"
112
- - "current_state" = 100+ words, segment-specific
113
- - "voice_matching" = 100+ words, segment-specific
114
- - "environment" = 250+ words; "camera_position" = 75+ words; "lighting_state" = 50+ words min
115
- - "camera_movement" = concrete, timestamped path (pan/tilt/dolly/handheld/steadicam)
116
- - "synchronized_actions" must have exactly these keys: "0:00-0:02","0:02-0:04","0:04-0:06","0:06-0:08","0:08-0:10"
117
- - Dialogue must fit in 10s naturally with breath points.
118
- - If continuationMode is true, include a continuity checkpoint aligning next segment’s start.
119
- - Set "segment_info.total_segments" = {N} on each segment.
120
- - Based on the character image provide select everything as asked.
121
- FULL SCRIPT:
122
- \"\"\"{inputs.script.strip()}\"\"\"
123
-
124
- AUTHORITATIVE SETTINGS (must be reflected):
125
- {knobs}
126
-
127
- SEGMENT LINES (cover in exactly 8 seconds each):
128
- """
129
- seg_lines = "\n".join([f"- Segment {i+1}: {t}" for i, t in enumerate(segment_texts)])
130
-
131
- footer = """
132
- OUTPUT:
133
- Return JSON only as:
134
- {
135
- "segments": [ { ... per-segment object exactly matching the schema ... } ]
136
- }
137
- """
138
- return header + seg_lines + footer
139
-
140
-
141
- # ---------- Validator (segment count, durations, keys, word counts, uniformity) ----------
142
-
143
- MIN_WORDS = {
144
- ("character_description", "physical"): 200,
145
- ("character_description", "clothing"): 150,
146
- ("character_description", "current_state"): 100,
147
- ("character_description", "voice_matching"): 100,
148
- ("scene_continuity", "environment"): 250,
149
- ("scene_continuity", "camera_position"): 75,
150
- ("scene_continuity", "lighting_state"): 50,
151
- ("scene_continuity", "props_in_frame"): 75,
152
- ("scene_continuity", "background_elements"): 50,
153
- ("action_timeline", "micro_expressions"): 50,
154
- }
155
-
156
- def _word_count(text: str) -> int:
157
- return len(re.findall(r"\b\w+\b", text or ""))
158
-
159
- def validate_segments_payload(payload: Dict[str, Any], expected_segments: int) -> List[str]:
160
- errors: List[str] = []
161
- segs = payload.get("segments", [])
162
- if len(segs) != expected_segments:
163
- errors.append(f"Expected {expected_segments} segments, got {len(segs)}.")
164
-
165
- required_sync_keys = {"0:00-0:02","0:02-0:04","0:04-0:06","0:06-0:08", "0:08-0:10"}
166
- physical_blocks, clothing_blocks = [], []
167
-
168
- for i, seg in enumerate(segs, start=1):
169
- si = seg.get("segment_info", {})
170
- if si.get("duration") != "00:00-00:10":
171
- errors.append(f"Segment {i}: duration must be 00:00-00:10.")
172
- if si.get("total_segments") != expected_segments:
173
- errors.append(f"Segment {i}: total_segments should be {expected_segments}, got {si.get('total_segments')}.")
174
-
175
- sync = seg.get("action_timeline", {}).get("synchronized_actions", {})
176
- if set(sync.keys()) != required_sync_keys:
177
- errors.append(f"Segment {i}: synchronized_actions must have keys {sorted(required_sync_keys)}.")
178
-
179
- # Word-count checks
180
- for (section, field), minw in MIN_WORDS.items():
181
- text = seg.get(section, {}).get(field, "")
182
- wc = _word_count(text)
183
- if wc < minw:
184
- errors.append(f"Segment {i}: {section}.{field} must be >= {minw} words (got {wc}).")
185
-
186
- ch = seg.get("character_description", {})
187
- physical_blocks.append(ch.get("physical", ""))
188
- clothing_blocks.append(ch.get("clothing", ""))
189
-
190
- # Uniformity across segments
191
- if expected_segments > 1:
192
- if len(set(physical_blocks)) > 1:
193
- errors.append("`character_description.physical` must be EXACTLY identical across all segments.")
194
- if len(set(clothing_blocks)) > 1:
195
- errors.append("`character_description.clothing` must be EXACTLY identical across all segments.")
196
-
197
- return errors
198
-
199
- def generate_segments_payload(
200
- inputs: VeoInputs,
201
- image_path: str = None,
202
- model: str = "gpt-4o",
203
- ) -> Dict[str, Any]:
204
- segment_texts = split_script_into_segments(inputs.script, seconds_per_segment=8)
205
- N = len(segment_texts)
206
- print(N)
207
-
208
- encoded_image = base64.b64encode(image_path).decode("utf-8")
209
-
210
- def _call_llm(user_prompt: str):
211
- return gpt_client.beta.chat.completions.parse(
212
- model=model,
213
- response_format=SegmentsPayload,
214
- messages=[
215
- {"role": "system", "content": "You are a precise JSON-only generator that must satisfy a strict schema and explicit segment count."},
216
- {
217
- "role": "user",
218
- "content": [
219
- {"type": "text", "text": user_prompt},
220
- {
221
- "type": "image_url",
222
- "image_url": {
223
- "url": f"data:image/jpeg;base64,{encoded_image}"
224
- },
225
- },
226
- ],
227
- },
228
- ],
229
- ).choices[0].message.parsed
230
-
231
- user_prompt = build_prompt(inputs, segment_texts)
232
- parsed_obj = _call_llm(user_prompt)
233
- payload = parsed_obj.model_dump(by_alias=True)
234
-
235
- return payload