bigbossmonster commited on
Commit
e98901e
·
verified ·
1 Parent(s): a5bd1e3

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +16 -674
Dockerfile CHANGED
@@ -1,679 +1,21 @@
1
- import logging
2
- import re
3
- from typing import Any, Dict, List, Optional
4
- from fastapi import FastAPI, HTTPException, Request
5
- from fastapi.middleware.cors import CORSMiddleware
6
- from pydantic import BaseModel
7
- import httpx
8
 
9
- # --- SETUP ---
10
- app = FastAPI(title="Gemini Smart Proxy")
11
 
12
- # Handle CORS (Equivalent to handleOptions in your worker)
13
- app.add_middleware(
14
- CORSMiddleware,
15
- allow_origins=["*"],
16
- allow_credentials=True,
17
- allow_methods=["POST", "OPTIONS"],
18
- allow_headers=["Content-Type"],
19
- )
20
 
21
- # Logger setup
22
- logging.basicConfig(level=logging.INFO)
23
- logger = logging.getLogger(__name__)
24
 
25
- # --- INPUT CLEANING HELPER ---
26
- def clean_input(text: str) -> str:
27
- """
28
- NUCLEAR OPTION: Removes everything except letters, numbers, dashes, underscores, and dots.
29
- This GUARANTEES no newlines or hidden characters can break the URL.
30
- """
31
- if not text:
32
- return ""
33
- # Replace anything that isn't safe with an empty string
34
- return re.sub(r'[^a-zA-Z0-9-._]', '', str(text))
35
 
36
- # --- SYSTEM PROMPTS ---
37
- # Copied exactly from your worker source
38
- PROMPTS = {
39
- # 1. VERIFIER PROMPTS
40
- "VERIFY_SINGLE": """You are an advanced subtitle verifier and corrector. You will be given a single pair of data: an original SRT block and an IMAGE of a corresponding PDF page. The image is the ground truth.
41
-
42
- Your task is to perform two actions and return the result as a single JSON object.
43
-
44
- Your response MUST be a valid JSON object that strictly follows this schema:
45
- {
46
- "type": "OBJECT",
47
- "properties": {
48
- "errorReport": { "type": "STRING" },
49
- "correctedSrt": { "type": "STRING" }
50
- },
51
- "required": ["errorReport", "correctedSrt"]
52
- }
53
-
54
- INSTRUCTIONS FOR EACH JSON KEY:
55
- 1. **"errorReport"**: Perform OCR on the image. Report ONLY significant errors (mismatch, extraneous/missing text). If none, this string MUST be "No significant errors found.".
56
- 2. **"correctedSrt"**: Generate a corrected SRT block for the current subtitle pair.
57
- All timestamps must remain exactly as in the original, and the output must contain only the raw SRT text — no explanations, no JSON, no metadata.
58
-
59
- Guidelines:
60
- 1. Source Priority:
61
- • Use the OCR image text as the primary source of truth.
62
- • If the OCR result contains Burmese text, use it as the main subtitle text.
63
- 2. Language Inclusion:
64
- • Keep both Burmese and English lines if they exist.
65
- • Do not include any other languages besides Burmese and English.
66
- 3. When OCR Text Is Missing:
67
- • If the OCR image contains no readable text, keep the timestamps exactly the same and output a blank subtitle line.
68
- • Do not delete, skip, or merge any subtitle blocks.
69
- 4. Preservation Rules:
70
- • Always preserve original timestamps, line breaks, and Burmese punctuation (။, ၊).
71
- • Maintain the same block numbering sequence as in the input.
72
- 5. Output Format:
73
- • Output only the clean, corrected SRT block.
74
- • No explanations, no quotes, no formatting outside the SRT syntax.""",
75
-
76
- "VERIFY_BATCH": """You are an advanced subtitle verifier and corrector. You will be given a BATCH of data containing several pairs of an original SRT block and a corresponding PDF page IMAGE. The image is the ground truth.
77
-
78
- Your task is to process EACH PAIR sequentially and return the result as a single JSON ARRAY, where each object in the array corresponds to a pair from the input.
79
-
80
- Your response MUST be a valid JSON array that strictly follows this schema:
81
- {
82
- "type": "ARRAY",
83
- "items": {
84
- "type": "OBJECT",
85
- "properties": {
86
- "errorReport": { "type": "STRING" },
87
- "correctedSrt": { "type": "STRING" }
88
- },
89
- "required": ["errorReport", "correctedSrt"]
90
- }
91
- }
92
-
93
- INSTRUCTIONS FOR EACH JSON OBJECT IN THE ARRAY:
94
- 1. **"errorReport"**: Perform OCR on the image for the current pair. Report ONLY significant errors (mismatch, extraneous/missing text). If none, this string MUST be "No significant errors found.".
95
- 2. “correctedSrt”:
96
- Generate a corrected SRT block for the current subtitle pair.
97
- All timestamps must remain exactly as in the original, and the output must contain only the raw SRT text — no explanations, no JSON, no metadata.
98
-
99
- Guidelines:
100
- 1. Source Priority:
101
- • Use the OCR image text as the primary source of truth.
102
- • If the OCR result contains Burmese text, use it as the main subtitle text.
103
- 2. Language Inclusion:
104
- • Keep both Burmese and English lines if they exist.
105
- • Do not include any other languages besides Burmese and English.
106
- 3. When OCR Text Is Missing:
107
- • If the OCR image contains no readable text, keep the timestamps exactly the same and output a blank subtitle line.
108
- • Do not delete, skip, or merge any subtitle blocks.
109
- 4. Preservation Rules:
110
- • Always preserve original timestamps, line breaks, and Burmese punctuation (။, ၊).
111
- • Maintain the same block numbering sequence as in the input.
112
- • Do not add missing Burmese punctuation (။) at the end of the line.
113
- • Do not add Burmese punctuation (။)
114
- 5. Output Format:
115
- • Output only the clean, corrected SRT block.
116
- • No explanations, no quotes, no formatting outside the SRT syntax.
117
-
118
- Process all pairs provided in the prompt and return a JSON array with the same number of objects as pairs you received.""",
119
-
120
- # 2. TRANSLATOR PROMPTS
121
- "TRANSLATE_BASE": """You are an expert subtitle translator. Your task is to translate the text in the provided JSON object to {{TARGET_LANGUAGE}}.
122
- 1. The user will provide a JSON object where keys are IDs (e.g., 'line_0', 'line_1') and values are the text lines.
123
- 2. You MUST translate the text *value* for each key into {{TARGET_LANGUAGE}}.
124
- 3. You MUST preserve all SRT/ASS formatting tags exactly as they appear (e.g., `{\\an8}`, `<i>`, `</i>`). Do NOT translate the content of these tags.
125
- 4. You MUST respond ONLY with a valid JSON object, containing the *exact same keys* as the input, with the translated text as the values.
126
- 5. Do not include `json` or ```json markers in your response. Respond only with the JSON object itself.
127
- 6. CRITICAL: Ensure all double quotes (") within the translated text *values* are properly escaped with a backslash (e.g., \\"example\\"). This is essential for the JSON to be valid.
128
- 7. CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.
129
- """,
130
-
131
- "TRANSLATE_NATURAL_ADDON": """\n**CRITICAL INSTRUCTIONS:**
132
- 1. **Context-Aware Pronouns:** Pay close attention to the flow of conversation (within the batch) to choose the most appropriate pronouns. Translate based on the inferred relationship and formality between speakers.
133
- 2. **Natural Flow:** The translation should sound natural in the target language, not like a literal word-for-word translation.
134
- 3. **Formatting:** Keep punctuation appropriate for the target language.""",
135
-
136
- "TRANSLATE_BASIC_ADDON": """\n**CRITICAL INSTRUCTIONS:**
137
- 1. **Direct Translation:** Prioritize accuracy over style.""",
138
-
139
- # === NEW: CONTEXT ANALYZER PROMPT ===
140
- "ANALYZE_CONTEXT": """You are a linguistic expert specializing in Burmese translation context.
141
- Analyze the provided subtitle excerpt (which is the beginning of a movie).
142
-
143
- Your goal is to extract context to help a translator choose the correct Burmese Pronouns and Vocabulary.
144
-
145
- Please output a concise "Translator's Note" covering:
146
- 1. **Genre & Tone:** (e.g., Action, Romance, Adult, Historical).
147
- 2. **Main Characters & Relationships:** Who is talking to whom? (e.g., "A and B are lovers", "C is D's boss").
148
- 3. **Pronoun Guide (CRITICAL):** - For each pair of speakers, specify the correct Burmese pronouns.
149
- - Examples:
150
- - "Male to Male (Friends): Use 'Min/Nga' (မင်း/ငါ)"
151
- - "Female to Male (Lovers): Use 'Maung/Mel' or intimate 'Nin/Nga'"
152
- - "Formal/Stranger: Use 'Khim-byar/Kyun-daw' or 'Shin/Kyun-ma'"
153
-
154
- **Output Format:** Just provide the analysis text. Do not translate the subtitles yet.""",
155
-
156
- # 3. TRANSCRIBER PROMPTS
157
- "TRANSCRIBE": """
158
- You are an expert subtitle editor for movies and music videos.
159
- Transcribe the provided audio file.
160
- Your output MUST be *only* in the standard SRT (SubRip Text) file format.
161
- Do not include any other text, explanations, or markdown formatting (like ```srt).
162
-
163
- Follow these professional subtitling rules:
164
- 1. **Timing:** Timestamps must be precise and tightly synced to the spoken words.
165
- 2. **Line Breaks:** Keep subtitles to a maximum of 2 lines.
166
- 3. **Readability:** Break lines at natural pauses, sentence ends, or clauses. Do not leave single words on a line.
167
- 4. **Length:** Aim for a maximum of 42 characters per line. This is a guideline for readability.
168
- 5. **Format:** The SRT format must be strictly followed:
169
- 1
170
- HH:MM:SS,MS --> HH:MM:SS,MS
171
- First line of text.
172
- Second line of text.
173
-
174
- 2
175
- HH:MM:SS,MS --> HH:MM:SS,MS
176
- Next subtitle.
177
- """,
178
-
179
- # 3.1 CHUNK TRANSCRIPTION PROMPT
180
- "TRANSCRIBE_CHUNK": """
181
- You are a precision subtitle generator.
182
- Transcribe ONLY the spoken words in this audio clip.
183
-
184
- OUTPUT FORMAT:
185
- Return ONLY a valid JSON array of objects. Do not wrap in markdown.
186
- Structure: [{"start": "MM:SS.mmm", "end": "MM:SS.mmm", "text": "spoken text"}]
187
-
188
- CRITICAL RULES:
189
- 1. **NO HALLUCINATIONS:** If there is silence, music only, or no clear speech, return an empty array []. Do NOT invent text like "Welcome to the video", "Subscribe", or "Next steps".
190
- 2. **TIMESTAMPS:** Timestamps must be relative to the beginning of *this specific audio file* (00:00.000).
191
- 3. **VERBATIM:** Transcribe exactly what is said. Do not summarize.
192
- 4. **JSON ONLY:** Raw JSON array only.
193
- """,
194
-
195
- # 4. VIDEO ANALYZER PROMPT
196
- "ANALYZE_VIDEO": """
197
- You are an expert content moderator for a major video platform like YouTube. Your task is to analyze the provided video and assign it one of three moderation levels. You must distinguish between content that is NOT AD-FRIENDLY (but allowed) and content that VIOLATES COMMUNITY GUIDELINES (and must be removed).
198
-
199
- **Your 3-Tier Decision:**
200
-
201
- 1. **"Safe" (Ad-Friendly):**
202
- * **Description:** The content is clean, safe for all advertisers, and has no issues.
203
- * **Action:** Full monetization.
204
- * **Categories:** []
205
-
206
- 2. **"Borderline" (Not Ad-Friendly):**
207
- * **Description:** The content is ALLOWED on the platform but is NOT suitable for most advertisers. It does NOT break community guidelines.
208
- * **Action:** Limited or no ads (demonetization).
209
- * **Categories:**
210
- * **Inappropriate Language:** Frequent use of profanity.
211
- * **Suggestive Content:** Non-explicit sexual themes, "beach fails," suggestive dancing, revealing outfits that are not nudity.
212
- * **Moderate Violence:** Non-graphic violence (e.g., in news, documentaries, or video games).
213
- * **Sensitive Topics:** Non-graphic discussion of war, tragedy, or other sensitive events.
214
- * **Inauthentic (Low-Effort):** Low-effort, machine-generated slideshows, robotic TTS voices that are not for accessibility. (This is allowed, but often demonetized at a channel level).
215
-
216
- 3. **"Violation" (Community Guideline Break):**
217
- * **Description:** The content is NOT ALLOWED on the platform and must be flagged for removal.
218
- * **Action:** Remove video, issue channel strike.
219
- * **Categories:**
220
- * **Hate Speech:** Direct attacks or promotion of violence/hatred against a protected group.
221
- * **Harassment & Bullying:** Malicious, targeted attacks on an individual.
222
- * **Graphic Violence:** Depictions of extreme, gratuitous violence intended to shock (outside of clear, brief news context).
223
- * **Dangerous Acts / Self-Harm:** Promoting or showing in detail acts that could lead to serious injury or suicide.
224
- * **Pornography / Explicit Nudity:** Any explicit sexual acts or nudity intended for sexual gratification (not educational or artistic).
225
- * **Harmful Misinformation:** Content that poses a direct, real-world harm (e.g., medical, civic).
226
- * **Spam & Scams:** Deceptive practices, fraudulent schemes.
227
-
228
- **Context is Key:** An educational video on breast cancer (non-sexual nudity) is "Safe". A news report on a conflict (sensitive topic) is "Borderline". A video promoting hatred is a "Violation".
229
-
230
- **Your Response Format (JSON):**
231
- You MUST return *only* a valid JSON object with this structure:
232
- {
233
- "decision": "Safe" | "Borderline" | "Violation",
234
- "categories_found": ["category1", "category2", ...] | [],
235
- "reasoning": "A brief, neutral explanation for your decision. Justify *why* it fits the 'Safe', 'Borderline', or 'Violation' tier, citing context."
236
- }
237
-
238
- Analyze the video's visual and audio content and return the JSON report.
239
- """,
240
-
241
- # 5. ANCHOR FINDER PROMPT
242
- "ANCHOR_FINDER": """You are an expert subtitle anchor point finder. I will give you a JSON payload.
243
- 1. **"source_lines"**: A JSON object where keys are line numbers and values are subtitle text in a foreign language (e.g., Burmese).
244
- 2. **"reference_srt"**: The *entire* text content of the reference .srt file (in English).
245
-
246
- Your job is to:
247
- 1. For *each* line in "source_lines", translate it to English.
248
- 2. Semantically search the "reference_srt" to find the *single best matching line*.
249
- 3. You MUST return a JSON object, mapping each *source* line number (as a string key) to the *matching reference* line number (as a number value).
250
-
251
- **EXAMPLE:**
252
- - **Source:** `{"100": "မြန်မာစာ..."}`
253
- - **Reference:** "...110...Hello...111...How are you...112...Burmese text..."
254
- - **Your Response:** `{"100": 112}`
255
- """,
256
-
257
- # 6. REPHRASER DEFAULT PROMPT
258
- "REPHRASE_DEFAULT": """You are an expert subtitle editor. Your task is to rephrase the text in the provided JSON object.
259
- 1. The user will provide a JSON object where keys are IDs (e.g., 'line_0', 'line_1') and values are the original subtitle text lines.
260
- 2. You MUST rephrase the text *value* for each key. The meaning must be identical, but the wording should be different (use synonyms, change sentence structure).
261
- 3. You MUST preserve all SRT/ASS formatting tags exactly as they appear (e.g., `{\\an8}`, `<i>`, `</i>`). Do NOT alter the content of these tags.
262
- 4. You MUST respond ONLY with a valid JSON object, containing the *exact same keys* as the input, with the rephrased text as the values.
263
- 5. Do not include `json` or ```json markers in your response. Respond only with the JSON object itself.
264
- 6. CRITICAL: Ensure all double quotes (") within the rephrased text *values* are properly escaped with a backslash (e.g., \\"example\\"). This is essential for the JSON to be valid.
265
- 7. Maintain the original language. Do not translate.
266
- 8. Do not change pronouns.
267
- 9. CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.""",
268
-
269
- # 7. LINE FIXER PROMPTS
270
- "LINE_FIXER_BURMESE": """You are a professional Burmese subtitle editor. Your task is to fix line breaks and split long subtitles.
271
-
272
- RULES:
273
- 1. NO CONTENT CHANGE: Do not add, delete, or change words. Only adjust whitespace/newlines.
274
- 2. STRICT MAX 2 LINES: Every subtitle event must be 1 or 2 lines. Never 3.
275
- 3. SPLIT LONG LINES (CRITICAL):
276
- - If a subtitle is too long for 2 lines, you MUST split it into multiple separate events.
277
- - Return an ARRAY of strings for that ID.
278
- - Example: "12": ["Part 1 text...", "Part 2 text..."]
279
-
280
- 4. BURMESE GRAMMAR & FLOW (CRITICAL):
281
- - NO ORPHANS: Never leave a single short word (like "ထင်တယ်", "တယ်", "မယ်", "ပါ", "နော်") on a new line by itself. Join it to the previous line.
282
- - NO SEPARATION: Do not separate modifiers like "အဲဒီ", "ဒီ", "ဟို" from the following noun. Do not start a line with particles.
283
- - BALANCE: Try to make lines roughly equal length, UNLESS it breaks a grammar rule. Grammar always wins.
284
-
285
- 5. JSON FORMAT: Respond only with valid JSON. Escape double quotes properly.
286
-
287
- CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.""",
288
-
289
- "LINE_FIXER_ENGLISH": """You are a Netflix Certified Subtitle QC Editor. Your task is to conform the provided English subtitles to the "Netflix English Timed Text Style Guide".
290
-
291
- RULES:
292
- 1. **Character Limitation:** Maximum 42 characters per line. Max 2 lines. Split if longer (return Array).
293
- 2. **Line Breaking Logic:** No splitting noun/article, name/surname. Break after punctuation/before conjunctions.
294
- 3. **Text Clean-Up:** Smart quotes. Smart ellipsis. Remove speaker labels unless off-screen. Remove filler words.
295
- 4. **JSON FORMAT:** Respond only with valid JSON. Key = ID. Value = String or Array of Strings. Escape double quotes.
296
-
297
- CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.""",
298
-
299
- "LINE_FIXER_GENERAL": """You are a Universal Subtitle Formatter. Your task is to clean and format raw text into professional subtitles for ANY language.
300
-
301
- RULES:
302
- 1. **Structure:** Strictly 1 or 2 lines. Split >40 chars or >80 chars total into separate events (return Array).
303
- 2. **Pyramid Formatting:** Prefer "Bottom-Heavy" pyramid.
304
- 3. **Clean Up:** Fix basic punctuation. Remove double spaces.
305
- 4. **Restriction:** Do not change meaning.
306
- 5. **JSON FORMAT:** Respond only with valid JSON. Key = ID. Value = String or Array. Escape double quotes.
307
-
308
- CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.""",
309
-
310
- "LINE_FIXER_OVERLAP_ADDON": """\n\n6. **TIMING & OVERLAP OPTIMIZATION (IMPORTANT):**\n - Since you are splitting lines, ensure each split part is concise and short. \n - Avoid cramming too much text into one block, as this causes reading speed issues and display overlaps.""",
311
-
312
- # 8. SUSPECT LINE FINDER PROMPT (UPDATED)
313
- "SUSPECT_CHECK": """You are a Senior Subtitle Quality Control (QC) Specialist specializing in Asian Language (Burmese/Korean/English) contexts.
314
-
315
- **YOUR GOAL:** Identify *High-Confidence Logic Errors* that break immersion.
316
- **DO NOT** flag minor stylistic choices, slightly awkward phrasing, or standard grammar variations.
317
- **ONLY** return a result if you are >90% sure it is an error.
318
-
319
- **LOOK FOR THESE SPECIFIC ERROR TYPES:**
320
-
321
- 1. **Contextual Logic Failures (The "Sheep vs. Thief" Rule):**
322
- - Text that is technically a valid word but makes NO SENSE in the specific scene context.
323
- - *Example:* A character trying to sleep counts "Thief 1, Thief 2" (Burmese 'Thu-khoe') instead of "Sheep 1, Sheep 2" (Burmese 'Thoe').
324
- - *Example:* A character in a car says "I bought a *flower* to drive" (phonetic mix-up) instead of "car".
325
-
326
- 2. **Name/Entity Inconsistency:**
327
- - A character's name changes spelling within the batch or compared to common transliteration norms.
328
- - *Example:* "Lee Gi-wu" becomes "Lee Gi-pu" or "Yi Ki-woo" in the same scene.
329
- - *Example:* "Gyaung-toe" (weird transliteration) vs "Gyeonggi-do" (standard place name).
330
-
331
- 3. **Nonsense / Typo / OCR Garbage:**
332
- - Words that appear to be keyboard smashes or phonetic errors that result in gibberish.
333
- - *Example:* "Dog Night" (Khway Nya) vs "Sniffer Dog" (Khway Nan) in a detective context.
334
-
335
- *** SPECIAL INSTRUCTION ***
336
- In addition to the standard logic checks, you MUST flag lines containing:
337
- 1. Broken Burmese encoding or rendering issues (e.g. 'န ေါ ေ', misplaced independent vowels, or character ordering errors).
338
- 2. Obvious keyboard mashing or nonsensical gibberish.
339
- You must flag these as 'Encoding/Nonsense' errors even if you are not 90% confident about the intended meaning. These are high-priority formatting errors.
340
- *** END SPECIAL INSTRUCTION ***
341
-
342
-
343
- **INPUT FORMAT:**
344
- A block of text containing subtitle lines prefixed with their Line ID (e.g., "ID: [Text]").
345
-
346
- **OUTPUT FORMAT:**
347
- Return a valid JSON Object containing a key "suspectLines". This key must be an Array of Objects.
348
-
349
- {
350
- "suspectLines": [
351
- {
352
- "id": "Line ID",
353
- "original": "The original suspect text",
354
- "reason": "A concise explanation of the logic error (e.g., 'Context: Counting sheep for sleep, not thieves.')",
355
- "suggestion": "The proposed correction"
356
- }
357
- ]
358
- }
359
-
360
- If no *critical* errors are found in a batch, return { "suspectLines": [] }.
361
-
362
- CRITICAL: Return ONLY raw JSON. No markdown."""
363
- }
364
-
365
- # --- Pydantic Models for Validation ---
366
-
367
- class RequestPayload(BaseModel):
368
- apiKey: str
369
- model: str
370
- task: str
371
- data: Dict[str, Any] = {}
372
-
373
- # --- HELPER FUNCTIONS ---
374
-
375
- def get_safety_settings() -> List[Dict[str, str]]:
376
- return [
377
- {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
378
- {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
379
- {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
380
- {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
381
- ]
382
-
383
- # --- MAIN ROUTE ---
384
-
385
- @app.post("/")
386
- async def handle_post(payload: RequestPayload):
387
- """
388
- Main entry point for the Smart Proxy.
389
- """
390
- # 1. CLEAN INPUTS
391
- # Use explicit stripping of all whitespace including newlines
392
- api_key = clean_input(payload.apiKey)
393
- model = clean_input(payload.model)
394
- task = str(payload.task).strip()
395
- data = payload.data
396
-
397
- if not api_key:
398
- raise HTTPException(status_code=400, detail="Missing 'apiKey'.")
399
- if not model:
400
- raise HTTPException(status_code=400, detail="Missing 'model'.")
401
- if not task:
402
- raise HTTPException(status_code=400, detail="Missing 'task'.")
403
-
404
- gemini_payload = None
405
-
406
- try:
407
- # --- PING ---
408
- if task == "ping":
409
- return {"status": "success", "message": "Pong! FastAPI Worker is active."}
410
-
411
- # --- VERIFY ---
412
- elif task == "verify":
413
- if not data.get("parts"):
414
- raise ValueError("Missing 'data.parts' for verify task.")
415
-
416
- is_batch = data.get("batchSize", 0) > 1
417
-
418
- schema = None
419
- if is_batch:
420
- schema = {
421
- "type": "ARRAY",
422
- "items": {
423
- "type": "OBJECT",
424
- "properties": {
425
- "errorReport": {"type": "STRING"},
426
- "correctedSrt": {"type": "STRING"},
427
- },
428
- "required": ["errorReport", "correctedSrt"],
429
- },
430
- }
431
- else:
432
- schema = {
433
- "type": "OBJECT",
434
- "properties": {
435
- "errorReport": {"type": "STRING"},
436
- "correctedSrt": {"type": "STRING"},
437
- },
438
- "required": ["errorReport", "correctedSrt"],
439
- }
440
-
441
- gemini_payload = {
442
- "systemInstruction": {"parts": [{"text": PROMPTS["VERIFY_BATCH"] if is_batch else PROMPTS["VERIFY_SINGLE"]}]},
443
- "contents": [{"role": "user", "parts": data["parts"]}],
444
- "generationConfig": {
445
- "responseMimeType": "application/json",
446
- "responseSchema": schema
447
- },
448
- "safetySettings": get_safety_settings(),
449
- }
450
-
451
- # --- ANALYZE SRT CONTEXT ---
452
- elif task == "analyze_srt":
453
- if not data.get("textBatch"):
454
- raise ValueError("Missing 'data.textBatch'.")
455
-
456
- gemini_payload = {
457
- "systemInstruction": {"parts": [{"text": PROMPTS["ANALYZE_CONTEXT"]}]},
458
- "contents": [{"parts": [{"text": f"Here is the first part of the subtitle file:\n\n{data['textBatch']}"}]}],
459
- "safetySettings": get_safety_settings(),
460
- }
461
-
462
- # --- TRANSLATE ---
463
- elif task == "translate":
464
- if not data.get("textBatch"):
465
- raise ValueError("Missing 'data.textBatch' for translate task.")
466
-
467
- target_language = data.get("targetLanguage", "Burmese (Myanmar)")
468
- translate_prompt = PROMPTS["TRANSLATE_BASE"].replace("{{TARGET_LANGUAGE}}", target_language)
469
-
470
- if data.get("promptVersion") == "basic":
471
- translate_prompt += PROMPTS["TRANSLATE_BASIC_ADDON"]
472
- else:
473
- translate_prompt += PROMPTS["TRANSLATE_NATURAL_ADDON"]
474
-
475
- # Specific Language Rules
476
- if "Burmese" in target_language or "Myanmar" in target_language:
477
- translate_prompt += "\nLANGUAGE SPECIFIC: Omit the Burmese full stop (၊) (။)."
478
-
479
- # Apply Styles
480
- style = data.get("style")
481
- if style == 'channel_myanmar':
482
- translate_prompt += "\nSTYLE: Channel Myanmar (CM). Tone: Casual, youthful, mainstream movie style. Use 'မင်း/ငါ' for friends/enemies, 'နင်/ငါ' for couples. Slang is allowed but keep it readable."
483
- elif style == 'cookie_tv':
484
- translate_prompt += "\nSTYLE: Cookie TV (K-Drama Style). Tone: Polite, warm, emotional. Use 'ရှင်/ကျွန်တော်', 'ကိုကို/မမ'. Focus on romantic and respectful nuances."
485
- elif style == 'viu':
486
- translate_prompt += "\nSTYLE: VIU / Netflix Standard. Tone: Professional, modern, balanced formality. No excessive slang. clear and concise."
487
- elif style == 'mrtv4':
488
- translate_prompt += "\nSTYLE: MRTV-4 / TV Dubbing. Tone: Formal, dramatic, spoken-word style. Strict grammar. Avoid English loan words if possible."
489
- elif style == 'disney':
490
- translate_prompt += "\nSTYLE: Disney+. Tone: Family-friendly, cheerful, clear. Avoid profanity. Use simple, universally understood words."
491
- elif style == 'jav':
492
- translate_prompt += ("\nSTYLE: **Adult (18+) Movie / JAV Fan Sub**. \n"
493
- "TONE: **'Khut-Kyan-Kyan' (ခပ်ကြမ်းကြမ်း)** - Rough, Raw, and Erotic. \n"
494
- "RULES: \n"
495
- "1. **Pronouns**: STRICTLY use 'ငါ' (I) and 'နင်' (You). NEVER use polite pronouns like 'ကျွန်တော်/ခင်ဗျား'. \n"
496
- "2. **Vocabulary**: Use direct, explicit street slang. Do not use formal or medical terms (e.g., use 'လိုး' instead of 'ဆက်ဆံ', 'စို့' instead of 'နမ်း'). \n"
497
- "3. **Context Mappings**: \n"
498
- " - 'I'm cumming / Iku' -> 'ပြီးပြီ' or 'ထွက်ကုန်ပြီ' \n"
499
- " - 'So good / Kimochi' -> 'ကောင်းလိုက်တာ' or 'ရှယ်ပဲ' or 'ဖီးလ်လာပြီ' \n"
500
- " - 'Stop / Yamete' -> 'မလုပ်နဲ့' or 'တော်ပါတော့' \n"
501
- " - 'Pussy / Manko' -> 'ပိပိ' \n"
502
- "4. **Formatting**: Keep sentences short, punchy, and intense. No Burmese punctuation (၊/။).")
503
-
504
- if data.get("context"):
505
- translate_prompt += f'\nCONTEXT: The user provided this specific context (IMDB Link or Movie Name or Character Relationships): "{data["context"]}". Use this to look up specific character names, gender relationships, and plot details to improve translation accuracy.'
506
-
507
- gemini_payload = {
508
- "systemInstruction": {"parts": [{"text": translate_prompt}]},
509
- "contents": [{"parts": [{"text": data["textBatch"]}]}],
510
- "safetySettings": get_safety_settings(),
511
- }
512
-
513
- # --- TRANSCRIBE (Standard) ---
514
- elif task == "transcribe":
515
- if not data.get("mimeType") or not data.get("audioData"):
516
- raise ValueError("Missing audio data for transcription.")
517
-
518
- gemini_payload = {
519
- "contents": [
520
- {
521
- "parts": [
522
- {"text": PROMPTS["TRANSCRIBE"]},
523
- {
524
- "inlineData": {
525
- "mimeType": data["mimeType"],
526
- "data": data["audioData"]
527
- }
528
- }
529
- ]
530
- }
531
- ],
532
- "safetySettings": get_safety_settings(),
533
- }
534
-
535
- # --- TRANSCRIBE CHUNK ---
536
- elif task == "transcribe_chunk":
537
- if not data.get("mimeType") or not data.get("audioData"):
538
- raise ValueError("Missing audio data for chunk transcription.")
539
-
540
- gemini_payload = {
541
- "contents": [
542
- {
543
- "parts": [
544
- {"text": PROMPTS["TRANSCRIBE_CHUNK"]},
545
- {
546
- "inlineData": {
547
- "mimeType": data["mimeType"],
548
- "data": data["audioData"]
549
- }
550
- }
551
- ]
552
- }
553
- ],
554
- "generationConfig": {"responseMimeType": "application/json"},
555
- "safetySettings": get_safety_settings(),
556
- }
557
-
558
- # --- VIDEO ANALYZER ---
559
- elif task == "analyze_video":
560
- if not data.get("videoUrl"):
561
- raise ValueError("Missing 'data.videoUrl'.")
562
-
563
- gemini_payload = {
564
- "systemInstruction": {"parts": [{"text": PROMPTS["ANALYZE_VIDEO"]}]},
565
- "contents": [{
566
- "parts": [
567
- {"file_data": {"file_uri": data["videoUrl"], "mime_type": "video/mp4"}},
568
- {"text": "Please analyze this video based on the system instructions and return the JSON moderation report."}
569
- ]
570
- }],
571
- "generationConfig": {"responseMimeType": "application/json"},
572
- "safetySettings": get_safety_settings(),
573
- }
574
-
575
- # --- ANCHOR FINDER ---
576
- elif task == "anchor_find":
577
- if not data.get("payload"):
578
- raise ValueError("Missing 'data.payload'.")
579
-
580
- gemini_payload = {
581
- "systemInstruction": {"parts": [{"text": PROMPTS["ANCHOR_FINDER"]}]},
582
- "contents": [{"parts": [{"text": data["payload"]}]}],
583
- "generationConfig": {
584
- "responseMimeType": "application/json",
585
- "responseSchema": data.get("responseSchema") # Dynamic schema
586
- },
587
- "safetySettings": get_safety_settings(),
588
- }
589
-
590
- # --- REPHRASER ---
591
- elif task == "rephrase":
592
- if not data.get("textBatch"):
593
- raise ValueError("Missing 'data.textBatch'.")
594
-
595
- rephrase_system_prompt = data.get("customPrompt") if data.get("customPrompt") else PROMPTS["REPHRASE_DEFAULT"]
596
-
597
- gemini_payload = {
598
- "systemInstruction": {"parts": [{"text": rephrase_system_prompt}]},
599
- "contents": [{"parts": [{"text": data["textBatch"]}]}],
600
- "safetySettings": get_safety_settings(),
601
- }
602
-
603
- # --- LINE FIXER ---
604
- elif task == "line_fixer":
605
- if not data.get("textBatch"):
606
- raise ValueError("Missing 'data.textBatch'.")
607
-
608
- mode = data.get("mode")
609
- fixer_prompt = ""
610
- if mode == "burmese":
611
- fixer_prompt = PROMPTS["LINE_FIXER_BURMESE"]
612
- elif mode == "english":
613
- fixer_prompt = PROMPTS["LINE_FIXER_ENGLISH"]
614
- else:
615
- fixer_prompt = PROMPTS["LINE_FIXER_GENERAL"]
616
-
617
- if data.get("preventOverlap"):
618
- fixer_prompt += PROMPTS["LINE_FIXER_OVERLAP_ADDON"]
619
-
620
- gemini_payload = {
621
- "systemInstruction": {"parts": [{"text": fixer_prompt}]},
622
- "contents": [{"parts": [{"text": data["textBatch"]}]}],
623
- "safetySettings": get_safety_settings(),
624
- }
625
-
626
- # --- SUSPECT CHECK ---
627
- elif task == "suspect_check":
628
- if not data.get("textBatch"):
629
- raise ValueError("Missing 'data.textBatch'.")
630
-
631
- gemini_payload = {
632
- "systemInstruction": {"parts": [{"text": PROMPTS["SUSPECT_CHECK"]}]},
633
- "contents": [{"parts": [{"text": data["textBatch"]}]}],
634
- "generationConfig": {"responseMimeType": "application/json"},
635
- "safetySettings": get_safety_settings(),
636
- }
637
-
638
- else:
639
- raise HTTPException(status_code=400, detail=f"Unknown task: {task}")
640
-
641
- except Exception as e:
642
- raise HTTPException(status_code=400, detail=f"Payload Construction Error: {str(e)}")
643
-
644
- # --- EXECUTE GEMINI CALL ---
645
- # Manually construct URL using cleaned inputs
646
- base_url = f"[https://generativelanguage.googleapis.com/v1beta/models/](https://generativelanguage.googleapis.com/v1beta/models/){model}:generateContent"
647
-
648
- # Use repr() to log invisible chars for debugging
649
- logger.info(f"DEBUG URL: {repr(base_url)}")
650
-
651
- async with httpx.AsyncClient(timeout=60.0) as client:
652
- try:
653
- # Pass API key as query param to avoid it contaminating the URL string manually
654
- api_response = await client.post(
655
- base_url,
656
- params={"key": api_key},
657
- json=gemini_payload,
658
- headers={"Content-Type": "application/json"}
659
- )
660
- except httpx.UnsupportedProtocol as e:
661
- logger.error(f"Protocol Error: {e}")
662
- raise HTTPException(status_code=500, detail=f"Server Protocol Error (Check inputs): {str(e)}")
663
- except Exception as e:
664
- logger.error(f"Connection Error: {e}")
665
- raise HTTPException(status_code=500, detail=f"Connection/Network Error: {repr(e)}")
666
-
667
- if api_response.status_code != 200:
668
- error_body = api_response.text
669
- raise HTTPException(
670
- status_code=api_response.status_code,
671
- detail=f"Gemini API Error: {api_response.status_code} {api_response.reason_phrase}. Response: {error_body}"
672
- )
673
-
674
- gemini_data = api_response.json()
675
- return gemini_data
676
-
677
- # --- RUN INSTRUCTION ---
678
- # Save as main.py
679
- # Run with: uvicorn main:app --reload
 
1
+ # Use the official Python image
2
+ FROM python:3.10
 
 
 
 
 
3
 
4
+ # Set the working directory
5
+ WORKDIR /code
6
 
7
+ # Copy the requirements file and install dependencies
8
+ COPY ./requirements.txt /code/requirements.txt
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
 
 
 
 
10
 
11
+ # Copy the application code
12
+ COPY ./app.py /code/app.py
 
13
 
14
+ # Create a non-root user (Hugging Face security requirement)
15
+ RUN useradd -m -u 1000 user
16
+ USER user
17
+ ENV HOME=/home/user \
18
+ PATH=/home/user/.local/bin:$PATH
 
 
 
 
 
19
 
20
+ # Run the application on port 7860
21
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]