Spaces:
Sleeping
Sleeping
Update Dockerfile
Browse files- Dockerfile +16 -674
Dockerfile
CHANGED
|
@@ -1,679 +1,21 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
from typing import Any, Dict, List, Optional
|
| 4 |
-
from fastapi import FastAPI, HTTPException, Request
|
| 5 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
-
from pydantic import BaseModel
|
| 7 |
-
import httpx
|
| 8 |
|
| 9 |
-
#
|
| 10 |
-
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
allow_origins=["*"],
|
| 16 |
-
allow_credentials=True,
|
| 17 |
-
allow_methods=["POST", "OPTIONS"],
|
| 18 |
-
allow_headers=["Content-Type"],
|
| 19 |
-
)
|
| 20 |
|
| 21 |
-
#
|
| 22 |
-
|
| 23 |
-
logger = logging.getLogger(__name__)
|
| 24 |
|
| 25 |
-
# -
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
"""
|
| 31 |
-
if not text:
|
| 32 |
-
return ""
|
| 33 |
-
# Replace anything that isn't safe with an empty string
|
| 34 |
-
return re.sub(r'[^a-zA-Z0-9-._]', '', str(text))
|
| 35 |
|
| 36 |
-
#
|
| 37 |
-
|
| 38 |
-
PROMPTS = {
|
| 39 |
-
# 1. VERIFIER PROMPTS
|
| 40 |
-
"VERIFY_SINGLE": """You are an advanced subtitle verifier and corrector. You will be given a single pair of data: an original SRT block and an IMAGE of a corresponding PDF page. The image is the ground truth.
|
| 41 |
-
|
| 42 |
-
Your task is to perform two actions and return the result as a single JSON object.
|
| 43 |
-
|
| 44 |
-
Your response MUST be a valid JSON object that strictly follows this schema:
|
| 45 |
-
{
|
| 46 |
-
"type": "OBJECT",
|
| 47 |
-
"properties": {
|
| 48 |
-
"errorReport": { "type": "STRING" },
|
| 49 |
-
"correctedSrt": { "type": "STRING" }
|
| 50 |
-
},
|
| 51 |
-
"required": ["errorReport", "correctedSrt"]
|
| 52 |
-
}
|
| 53 |
-
|
| 54 |
-
INSTRUCTIONS FOR EACH JSON KEY:
|
| 55 |
-
1. **"errorReport"**: Perform OCR on the image. Report ONLY significant errors (mismatch, extraneous/missing text). If none, this string MUST be "No significant errors found.".
|
| 56 |
-
2. **"correctedSrt"**: Generate a corrected SRT block for the current subtitle pair.
|
| 57 |
-
All timestamps must remain exactly as in the original, and the output must contain only the raw SRT text — no explanations, no JSON, no metadata.
|
| 58 |
-
|
| 59 |
-
Guidelines:
|
| 60 |
-
1. Source Priority:
|
| 61 |
-
• Use the OCR image text as the primary source of truth.
|
| 62 |
-
• If the OCR result contains Burmese text, use it as the main subtitle text.
|
| 63 |
-
2. Language Inclusion:
|
| 64 |
-
• Keep both Burmese and English lines if they exist.
|
| 65 |
-
• Do not include any other languages besides Burmese and English.
|
| 66 |
-
3. When OCR Text Is Missing:
|
| 67 |
-
• If the OCR image contains no readable text, keep the timestamps exactly the same and output a blank subtitle line.
|
| 68 |
-
• Do not delete, skip, or merge any subtitle blocks.
|
| 69 |
-
4. Preservation Rules:
|
| 70 |
-
• Always preserve original timestamps, line breaks, and Burmese punctuation (။, ၊).
|
| 71 |
-
• Maintain the same block numbering sequence as in the input.
|
| 72 |
-
5. Output Format:
|
| 73 |
-
• Output only the clean, corrected SRT block.
|
| 74 |
-
• No explanations, no quotes, no formatting outside the SRT syntax.""",
|
| 75 |
-
|
| 76 |
-
"VERIFY_BATCH": """You are an advanced subtitle verifier and corrector. You will be given a BATCH of data containing several pairs of an original SRT block and a corresponding PDF page IMAGE. The image is the ground truth.
|
| 77 |
-
|
| 78 |
-
Your task is to process EACH PAIR sequentially and return the result as a single JSON ARRAY, where each object in the array corresponds to a pair from the input.
|
| 79 |
-
|
| 80 |
-
Your response MUST be a valid JSON array that strictly follows this schema:
|
| 81 |
-
{
|
| 82 |
-
"type": "ARRAY",
|
| 83 |
-
"items": {
|
| 84 |
-
"type": "OBJECT",
|
| 85 |
-
"properties": {
|
| 86 |
-
"errorReport": { "type": "STRING" },
|
| 87 |
-
"correctedSrt": { "type": "STRING" }
|
| 88 |
-
},
|
| 89 |
-
"required": ["errorReport", "correctedSrt"]
|
| 90 |
-
}
|
| 91 |
-
}
|
| 92 |
-
|
| 93 |
-
INSTRUCTIONS FOR EACH JSON OBJECT IN THE ARRAY:
|
| 94 |
-
1. **"errorReport"**: Perform OCR on the image for the current pair. Report ONLY significant errors (mismatch, extraneous/missing text). If none, this string MUST be "No significant errors found.".
|
| 95 |
-
2. “correctedSrt”:
|
| 96 |
-
Generate a corrected SRT block for the current subtitle pair.
|
| 97 |
-
All timestamps must remain exactly as in the original, and the output must contain only the raw SRT text — no explanations, no JSON, no metadata.
|
| 98 |
-
|
| 99 |
-
Guidelines:
|
| 100 |
-
1. Source Priority:
|
| 101 |
-
• Use the OCR image text as the primary source of truth.
|
| 102 |
-
• If the OCR result contains Burmese text, use it as the main subtitle text.
|
| 103 |
-
2. Language Inclusion:
|
| 104 |
-
• Keep both Burmese and English lines if they exist.
|
| 105 |
-
• Do not include any other languages besides Burmese and English.
|
| 106 |
-
3. When OCR Text Is Missing:
|
| 107 |
-
• If the OCR image contains no readable text, keep the timestamps exactly the same and output a blank subtitle line.
|
| 108 |
-
• Do not delete, skip, or merge any subtitle blocks.
|
| 109 |
-
4. Preservation Rules:
|
| 110 |
-
• Always preserve original timestamps, line breaks, and Burmese punctuation (။, ၊).
|
| 111 |
-
• Maintain the same block numbering sequence as in the input.
|
| 112 |
-
• Do not add missing Burmese punctuation (။) at the end of the line.
|
| 113 |
-
• Do not add Burmese punctuation (။)
|
| 114 |
-
5. Output Format:
|
| 115 |
-
• Output only the clean, corrected SRT block.
|
| 116 |
-
• No explanations, no quotes, no formatting outside the SRT syntax.
|
| 117 |
-
|
| 118 |
-
Process all pairs provided in the prompt and return a JSON array with the same number of objects as pairs you received.""",
|
| 119 |
-
|
| 120 |
-
# 2. TRANSLATOR PROMPTS
|
| 121 |
-
"TRANSLATE_BASE": """You are an expert subtitle translator. Your task is to translate the text in the provided JSON object to {{TARGET_LANGUAGE}}.
|
| 122 |
-
1. The user will provide a JSON object where keys are IDs (e.g., 'line_0', 'line_1') and values are the text lines.
|
| 123 |
-
2. You MUST translate the text *value* for each key into {{TARGET_LANGUAGE}}.
|
| 124 |
-
3. You MUST preserve all SRT/ASS formatting tags exactly as they appear (e.g., `{\\an8}`, `<i>`, `</i>`). Do NOT translate the content of these tags.
|
| 125 |
-
4. You MUST respond ONLY with a valid JSON object, containing the *exact same keys* as the input, with the translated text as the values.
|
| 126 |
-
5. Do not include `json` or ```json markers in your response. Respond only with the JSON object itself.
|
| 127 |
-
6. CRITICAL: Ensure all double quotes (") within the translated text *values* are properly escaped with a backslash (e.g., \\"example\\"). This is essential for the JSON to be valid.
|
| 128 |
-
7. CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.
|
| 129 |
-
""",
|
| 130 |
-
|
| 131 |
-
"TRANSLATE_NATURAL_ADDON": """\n**CRITICAL INSTRUCTIONS:**
|
| 132 |
-
1. **Context-Aware Pronouns:** Pay close attention to the flow of conversation (within the batch) to choose the most appropriate pronouns. Translate based on the inferred relationship and formality between speakers.
|
| 133 |
-
2. **Natural Flow:** The translation should sound natural in the target language, not like a literal word-for-word translation.
|
| 134 |
-
3. **Formatting:** Keep punctuation appropriate for the target language.""",
|
| 135 |
-
|
| 136 |
-
"TRANSLATE_BASIC_ADDON": """\n**CRITICAL INSTRUCTIONS:**
|
| 137 |
-
1. **Direct Translation:** Prioritize accuracy over style.""",
|
| 138 |
-
|
| 139 |
-
# === NEW: CONTEXT ANALYZER PROMPT ===
|
| 140 |
-
"ANALYZE_CONTEXT": """You are a linguistic expert specializing in Burmese translation context.
|
| 141 |
-
Analyze the provided subtitle excerpt (which is the beginning of a movie).
|
| 142 |
-
|
| 143 |
-
Your goal is to extract context to help a translator choose the correct Burmese Pronouns and Vocabulary.
|
| 144 |
-
|
| 145 |
-
Please output a concise "Translator's Note" covering:
|
| 146 |
-
1. **Genre & Tone:** (e.g., Action, Romance, Adult, Historical).
|
| 147 |
-
2. **Main Characters & Relationships:** Who is talking to whom? (e.g., "A and B are lovers", "C is D's boss").
|
| 148 |
-
3. **Pronoun Guide (CRITICAL):** - For each pair of speakers, specify the correct Burmese pronouns.
|
| 149 |
-
- Examples:
|
| 150 |
-
- "Male to Male (Friends): Use 'Min/Nga' (မင်း/ငါ)"
|
| 151 |
-
- "Female to Male (Lovers): Use 'Maung/Mel' or intimate 'Nin/Nga'"
|
| 152 |
-
- "Formal/Stranger: Use 'Khim-byar/Kyun-daw' or 'Shin/Kyun-ma'"
|
| 153 |
-
|
| 154 |
-
**Output Format:** Just provide the analysis text. Do not translate the subtitles yet.""",
|
| 155 |
-
|
| 156 |
-
# 3. TRANSCRIBER PROMPTS
|
| 157 |
-
"TRANSCRIBE": """
|
| 158 |
-
You are an expert subtitle editor for movies and music videos.
|
| 159 |
-
Transcribe the provided audio file.
|
| 160 |
-
Your output MUST be *only* in the standard SRT (SubRip Text) file format.
|
| 161 |
-
Do not include any other text, explanations, or markdown formatting (like ```srt).
|
| 162 |
-
|
| 163 |
-
Follow these professional subtitling rules:
|
| 164 |
-
1. **Timing:** Timestamps must be precise and tightly synced to the spoken words.
|
| 165 |
-
2. **Line Breaks:** Keep subtitles to a maximum of 2 lines.
|
| 166 |
-
3. **Readability:** Break lines at natural pauses, sentence ends, or clauses. Do not leave single words on a line.
|
| 167 |
-
4. **Length:** Aim for a maximum of 42 characters per line. This is a guideline for readability.
|
| 168 |
-
5. **Format:** The SRT format must be strictly followed:
|
| 169 |
-
1
|
| 170 |
-
HH:MM:SS,MS --> HH:MM:SS,MS
|
| 171 |
-
First line of text.
|
| 172 |
-
Second line of text.
|
| 173 |
-
|
| 174 |
-
2
|
| 175 |
-
HH:MM:SS,MS --> HH:MM:SS,MS
|
| 176 |
-
Next subtitle.
|
| 177 |
-
""",
|
| 178 |
-
|
| 179 |
-
# 3.1 CHUNK TRANSCRIPTION PROMPT
|
| 180 |
-
"TRANSCRIBE_CHUNK": """
|
| 181 |
-
You are a precision subtitle generator.
|
| 182 |
-
Transcribe ONLY the spoken words in this audio clip.
|
| 183 |
-
|
| 184 |
-
OUTPUT FORMAT:
|
| 185 |
-
Return ONLY a valid JSON array of objects. Do not wrap in markdown.
|
| 186 |
-
Structure: [{"start": "MM:SS.mmm", "end": "MM:SS.mmm", "text": "spoken text"}]
|
| 187 |
-
|
| 188 |
-
CRITICAL RULES:
|
| 189 |
-
1. **NO HALLUCINATIONS:** If there is silence, music only, or no clear speech, return an empty array []. Do NOT invent text like "Welcome to the video", "Subscribe", or "Next steps".
|
| 190 |
-
2. **TIMESTAMPS:** Timestamps must be relative to the beginning of *this specific audio file* (00:00.000).
|
| 191 |
-
3. **VERBATIM:** Transcribe exactly what is said. Do not summarize.
|
| 192 |
-
4. **JSON ONLY:** Raw JSON array only.
|
| 193 |
-
""",
|
| 194 |
-
|
| 195 |
-
# 4. VIDEO ANALYZER PROMPT
|
| 196 |
-
"ANALYZE_VIDEO": """
|
| 197 |
-
You are an expert content moderator for a major video platform like YouTube. Your task is to analyze the provided video and assign it one of three moderation levels. You must distinguish between content that is NOT AD-FRIENDLY (but allowed) and content that VIOLATES COMMUNITY GUIDELINES (and must be removed).
|
| 198 |
-
|
| 199 |
-
**Your 3-Tier Decision:**
|
| 200 |
-
|
| 201 |
-
1. **"Safe" (Ad-Friendly):**
|
| 202 |
-
* **Description:** The content is clean, safe for all advertisers, and has no issues.
|
| 203 |
-
* **Action:** Full monetization.
|
| 204 |
-
* **Categories:** []
|
| 205 |
-
|
| 206 |
-
2. **"Borderline" (Not Ad-Friendly):**
|
| 207 |
-
* **Description:** The content is ALLOWED on the platform but is NOT suitable for most advertisers. It does NOT break community guidelines.
|
| 208 |
-
* **Action:** Limited or no ads (demonetization).
|
| 209 |
-
* **Categories:**
|
| 210 |
-
* **Inappropriate Language:** Frequent use of profanity.
|
| 211 |
-
* **Suggestive Content:** Non-explicit sexual themes, "beach fails," suggestive dancing, revealing outfits that are not nudity.
|
| 212 |
-
* **Moderate Violence:** Non-graphic violence (e.g., in news, documentaries, or video games).
|
| 213 |
-
* **Sensitive Topics:** Non-graphic discussion of war, tragedy, or other sensitive events.
|
| 214 |
-
* **Inauthentic (Low-Effort):** Low-effort, machine-generated slideshows, robotic TTS voices that are not for accessibility. (This is allowed, but often demonetized at a channel level).
|
| 215 |
-
|
| 216 |
-
3. **"Violation" (Community Guideline Break):**
|
| 217 |
-
* **Description:** The content is NOT ALLOWED on the platform and must be flagged for removal.
|
| 218 |
-
* **Action:** Remove video, issue channel strike.
|
| 219 |
-
* **Categories:**
|
| 220 |
-
* **Hate Speech:** Direct attacks or promotion of violence/hatred against a protected group.
|
| 221 |
-
* **Harassment & Bullying:** Malicious, targeted attacks on an individual.
|
| 222 |
-
* **Graphic Violence:** Depictions of extreme, gratuitous violence intended to shock (outside of clear, brief news context).
|
| 223 |
-
* **Dangerous Acts / Self-Harm:** Promoting or showing in detail acts that could lead to serious injury or suicide.
|
| 224 |
-
* **Pornography / Explicit Nudity:** Any explicit sexual acts or nudity intended for sexual gratification (not educational or artistic).
|
| 225 |
-
* **Harmful Misinformation:** Content that poses a direct, real-world harm (e.g., medical, civic).
|
| 226 |
-
* **Spam & Scams:** Deceptive practices, fraudulent schemes.
|
| 227 |
-
|
| 228 |
-
**Context is Key:** An educational video on breast cancer (non-sexual nudity) is "Safe". A news report on a conflict (sensitive topic) is "Borderline". A video promoting hatred is a "Violation".
|
| 229 |
-
|
| 230 |
-
**Your Response Format (JSON):**
|
| 231 |
-
You MUST return *only* a valid JSON object with this structure:
|
| 232 |
-
{
|
| 233 |
-
"decision": "Safe" | "Borderline" | "Violation",
|
| 234 |
-
"categories_found": ["category1", "category2", ...] | [],
|
| 235 |
-
"reasoning": "A brief, neutral explanation for your decision. Justify *why* it fits the 'Safe', 'Borderline', or 'Violation' tier, citing context."
|
| 236 |
-
}
|
| 237 |
-
|
| 238 |
-
Analyze the video's visual and audio content and return the JSON report.
|
| 239 |
-
""",
|
| 240 |
-
|
| 241 |
-
# 5. ANCHOR FINDER PROMPT
|
| 242 |
-
"ANCHOR_FINDER": """You are an expert subtitle anchor point finder. I will give you a JSON payload.
|
| 243 |
-
1. **"source_lines"**: A JSON object where keys are line numbers and values are subtitle text in a foreign language (e.g., Burmese).
|
| 244 |
-
2. **"reference_srt"**: The *entire* text content of the reference .srt file (in English).
|
| 245 |
-
|
| 246 |
-
Your job is to:
|
| 247 |
-
1. For *each* line in "source_lines", translate it to English.
|
| 248 |
-
2. Semantically search the "reference_srt" to find the *single best matching line*.
|
| 249 |
-
3. You MUST return a JSON object, mapping each *source* line number (as a string key) to the *matching reference* line number (as a number value).
|
| 250 |
-
|
| 251 |
-
**EXAMPLE:**
|
| 252 |
-
- **Source:** `{"100": "မြန်မာစာ..."}`
|
| 253 |
-
- **Reference:** "...110...Hello...111...How are you...112...Burmese text..."
|
| 254 |
-
- **Your Response:** `{"100": 112}`
|
| 255 |
-
""",
|
| 256 |
-
|
| 257 |
-
# 6. REPHRASER DEFAULT PROMPT
|
| 258 |
-
"REPHRASE_DEFAULT": """You are an expert subtitle editor. Your task is to rephrase the text in the provided JSON object.
|
| 259 |
-
1. The user will provide a JSON object where keys are IDs (e.g., 'line_0', 'line_1') and values are the original subtitle text lines.
|
| 260 |
-
2. You MUST rephrase the text *value* for each key. The meaning must be identical, but the wording should be different (use synonyms, change sentence structure).
|
| 261 |
-
3. You MUST preserve all SRT/ASS formatting tags exactly as they appear (e.g., `{\\an8}`, `<i>`, `</i>`). Do NOT alter the content of these tags.
|
| 262 |
-
4. You MUST respond ONLY with a valid JSON object, containing the *exact same keys* as the input, with the rephrased text as the values.
|
| 263 |
-
5. Do not include `json` or ```json markers in your response. Respond only with the JSON object itself.
|
| 264 |
-
6. CRITICAL: Ensure all double quotes (") within the rephrased text *values* are properly escaped with a backslash (e.g., \\"example\\"). This is essential for the JSON to be valid.
|
| 265 |
-
7. Maintain the original language. Do not translate.
|
| 266 |
-
8. Do not change pronouns.
|
| 267 |
-
9. CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.""",
|
| 268 |
-
|
| 269 |
-
# 7. LINE FIXER PROMPTS
|
| 270 |
-
"LINE_FIXER_BURMESE": """You are a professional Burmese subtitle editor. Your task is to fix line breaks and split long subtitles.
|
| 271 |
-
|
| 272 |
-
RULES:
|
| 273 |
-
1. NO CONTENT CHANGE: Do not add, delete, or change words. Only adjust whitespace/newlines.
|
| 274 |
-
2. STRICT MAX 2 LINES: Every subtitle event must be 1 or 2 lines. Never 3.
|
| 275 |
-
3. SPLIT LONG LINES (CRITICAL):
|
| 276 |
-
- If a subtitle is too long for 2 lines, you MUST split it into multiple separate events.
|
| 277 |
-
- Return an ARRAY of strings for that ID.
|
| 278 |
-
- Example: "12": ["Part 1 text...", "Part 2 text..."]
|
| 279 |
-
|
| 280 |
-
4. BURMESE GRAMMAR & FLOW (CRITICAL):
|
| 281 |
-
- NO ORPHANS: Never leave a single short word (like "ထင်တယ်", "တယ်", "မယ်", "ပါ", "နော်") on a new line by itself. Join it to the previous line.
|
| 282 |
-
- NO SEPARATION: Do not separate modifiers like "အဲဒီ", "ဒီ", "ဟို" from the following noun. Do not start a line with particles.
|
| 283 |
-
- BALANCE: Try to make lines roughly equal length, UNLESS it breaks a grammar rule. Grammar always wins.
|
| 284 |
-
|
| 285 |
-
5. JSON FORMAT: Respond only with valid JSON. Escape double quotes properly.
|
| 286 |
-
|
| 287 |
-
CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.""",
|
| 288 |
-
|
| 289 |
-
"LINE_FIXER_ENGLISH": """You are a Netflix Certified Subtitle QC Editor. Your task is to conform the provided English subtitles to the "Netflix English Timed Text Style Guide".
|
| 290 |
-
|
| 291 |
-
RULES:
|
| 292 |
-
1. **Character Limitation:** Maximum 42 characters per line. Max 2 lines. Split if longer (return Array).
|
| 293 |
-
2. **Line Breaking Logic:** No splitting noun/article, name/surname. Break after punctuation/before conjunctions.
|
| 294 |
-
3. **Text Clean-Up:** Smart quotes. Smart ellipsis. Remove speaker labels unless off-screen. Remove filler words.
|
| 295 |
-
4. **JSON FORMAT:** Respond only with valid JSON. Key = ID. Value = String or Array of Strings. Escape double quotes.
|
| 296 |
-
|
| 297 |
-
CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.""",
|
| 298 |
-
|
| 299 |
-
"LINE_FIXER_GENERAL": """You are a Universal Subtitle Formatter. Your task is to clean and format raw text into professional subtitles for ANY language.
|
| 300 |
-
|
| 301 |
-
RULES:
|
| 302 |
-
1. **Structure:** Strictly 1 or 2 lines. Split >40 chars or >80 chars total into separate events (return Array).
|
| 303 |
-
2. **Pyramid Formatting:** Prefer "Bottom-Heavy" pyramid.
|
| 304 |
-
3. **Clean Up:** Fix basic punctuation. Remove double spaces.
|
| 305 |
-
4. **Restriction:** Do not change meaning.
|
| 306 |
-
5. **JSON FORMAT:** Respond only with valid JSON. Key = ID. Value = String or Array. Escape double quotes.
|
| 307 |
-
|
| 308 |
-
CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like `{\\an8}`, `\\N`), you MUST escape them as double backslashes (e.g., `{\\\\an8}`, `\\\\N`) in the JSON string value. Failure to do this will break the JSON parser.""",
|
| 309 |
-
|
| 310 |
-
"LINE_FIXER_OVERLAP_ADDON": """\n\n6. **TIMING & OVERLAP OPTIMIZATION (IMPORTANT):**\n - Since you are splitting lines, ensure each split part is concise and short. \n - Avoid cramming too much text into one block, as this causes reading speed issues and display overlaps.""",
|
| 311 |
-
|
| 312 |
-
# 8. SUSPECT LINE FINDER PROMPT (UPDATED)
|
| 313 |
-
"SUSPECT_CHECK": """You are a Senior Subtitle Quality Control (QC) Specialist specializing in Asian Language (Burmese/Korean/English) contexts.
|
| 314 |
-
|
| 315 |
-
**YOUR GOAL:** Identify *High-Confidence Logic Errors* that break immersion.
|
| 316 |
-
**DO NOT** flag minor stylistic choices, slightly awkward phrasing, or standard grammar variations.
|
| 317 |
-
**ONLY** return a result if you are >90% sure it is an error.
|
| 318 |
-
|
| 319 |
-
**LOOK FOR THESE SPECIFIC ERROR TYPES:**
|
| 320 |
-
|
| 321 |
-
1. **Contextual Logic Failures (The "Sheep vs. Thief" Rule):**
|
| 322 |
-
- Text that is technically a valid word but makes NO SENSE in the specific scene context.
|
| 323 |
-
- *Example:* A character trying to sleep counts "Thief 1, Thief 2" (Burmese 'Thu-khoe') instead of "Sheep 1, Sheep 2" (Burmese 'Thoe').
|
| 324 |
-
- *Example:* A character in a car says "I bought a *flower* to drive" (phonetic mix-up) instead of "car".
|
| 325 |
-
|
| 326 |
-
2. **Name/Entity Inconsistency:**
|
| 327 |
-
- A character's name changes spelling within the batch or compared to common transliteration norms.
|
| 328 |
-
- *Example:* "Lee Gi-wu" becomes "Lee Gi-pu" or "Yi Ki-woo" in the same scene.
|
| 329 |
-
- *Example:* "Gyaung-toe" (weird transliteration) vs "Gyeonggi-do" (standard place name).
|
| 330 |
-
|
| 331 |
-
3. **Nonsense / Typo / OCR Garbage:**
|
| 332 |
-
- Words that appear to be keyboard smashes or phonetic errors that result in gibberish.
|
| 333 |
-
- *Example:* "Dog Night" (Khway Nya) vs "Sniffer Dog" (Khway Nan) in a detective context.
|
| 334 |
-
|
| 335 |
-
*** SPECIAL INSTRUCTION ***
|
| 336 |
-
In addition to the standard logic checks, you MUST flag lines containing:
|
| 337 |
-
1. Broken Burmese encoding or rendering issues (e.g. 'န ေါ ေ', misplaced independent vowels, or character ordering errors).
|
| 338 |
-
2. Obvious keyboard mashing or nonsensical gibberish.
|
| 339 |
-
You must flag these as 'Encoding/Nonsense' errors even if you are not 90% confident about the intended meaning. These are high-priority formatting errors.
|
| 340 |
-
*** END SPECIAL INSTRUCTION ***
|
| 341 |
-
|
| 342 |
-
|
| 343 |
-
**INPUT FORMAT:**
|
| 344 |
-
A block of text containing subtitle lines prefixed with their Line ID (e.g., "ID: [Text]").
|
| 345 |
-
|
| 346 |
-
**OUTPUT FORMAT:**
|
| 347 |
-
Return a valid JSON Object containing a key "suspectLines". This key must be an Array of Objects.
|
| 348 |
-
|
| 349 |
-
{
|
| 350 |
-
"suspectLines": [
|
| 351 |
-
{
|
| 352 |
-
"id": "Line ID",
|
| 353 |
-
"original": "The original suspect text",
|
| 354 |
-
"reason": "A concise explanation of the logic error (e.g., 'Context: Counting sheep for sleep, not thieves.')",
|
| 355 |
-
"suggestion": "The proposed correction"
|
| 356 |
-
}
|
| 357 |
-
]
|
| 358 |
-
}
|
| 359 |
-
|
| 360 |
-
If no *critical* errors are found in a batch, return { "suspectLines": [] }.
|
| 361 |
-
|
| 362 |
-
CRITICAL: Return ONLY raw JSON. No markdown."""
|
| 363 |
-
}
|
| 364 |
-
|
| 365 |
-
# --- Pydantic Models for Validation ---
|
| 366 |
-
|
| 367 |
-
class RequestPayload(BaseModel):
|
| 368 |
-
apiKey: str
|
| 369 |
-
model: str
|
| 370 |
-
task: str
|
| 371 |
-
data: Dict[str, Any] = {}
|
| 372 |
-
|
| 373 |
-
# --- HELPER FUNCTIONS ---
|
| 374 |
-
|
| 375 |
-
def get_safety_settings() -> List[Dict[str, str]]:
|
| 376 |
-
return [
|
| 377 |
-
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
|
| 378 |
-
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
|
| 379 |
-
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
|
| 380 |
-
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
|
| 381 |
-
]
|
| 382 |
-
|
| 383 |
-
# --- MAIN ROUTE ---
|
| 384 |
-
|
| 385 |
-
@app.post("/")
|
| 386 |
-
async def handle_post(payload: RequestPayload):
|
| 387 |
-
"""
|
| 388 |
-
Main entry point for the Smart Proxy.
|
| 389 |
-
"""
|
| 390 |
-
# 1. CLEAN INPUTS
|
| 391 |
-
# Use explicit stripping of all whitespace including newlines
|
| 392 |
-
api_key = clean_input(payload.apiKey)
|
| 393 |
-
model = clean_input(payload.model)
|
| 394 |
-
task = str(payload.task).strip()
|
| 395 |
-
data = payload.data
|
| 396 |
-
|
| 397 |
-
if not api_key:
|
| 398 |
-
raise HTTPException(status_code=400, detail="Missing 'apiKey'.")
|
| 399 |
-
if not model:
|
| 400 |
-
raise HTTPException(status_code=400, detail="Missing 'model'.")
|
| 401 |
-
if not task:
|
| 402 |
-
raise HTTPException(status_code=400, detail="Missing 'task'.")
|
| 403 |
-
|
| 404 |
-
gemini_payload = None
|
| 405 |
-
|
| 406 |
-
try:
|
| 407 |
-
# --- PING ---
|
| 408 |
-
if task == "ping":
|
| 409 |
-
return {"status": "success", "message": "Pong! FastAPI Worker is active."}
|
| 410 |
-
|
| 411 |
-
# --- VERIFY ---
|
| 412 |
-
elif task == "verify":
|
| 413 |
-
if not data.get("parts"):
|
| 414 |
-
raise ValueError("Missing 'data.parts' for verify task.")
|
| 415 |
-
|
| 416 |
-
is_batch = data.get("batchSize", 0) > 1
|
| 417 |
-
|
| 418 |
-
schema = None
|
| 419 |
-
if is_batch:
|
| 420 |
-
schema = {
|
| 421 |
-
"type": "ARRAY",
|
| 422 |
-
"items": {
|
| 423 |
-
"type": "OBJECT",
|
| 424 |
-
"properties": {
|
| 425 |
-
"errorReport": {"type": "STRING"},
|
| 426 |
-
"correctedSrt": {"type": "STRING"},
|
| 427 |
-
},
|
| 428 |
-
"required": ["errorReport", "correctedSrt"],
|
| 429 |
-
},
|
| 430 |
-
}
|
| 431 |
-
else:
|
| 432 |
-
schema = {
|
| 433 |
-
"type": "OBJECT",
|
| 434 |
-
"properties": {
|
| 435 |
-
"errorReport": {"type": "STRING"},
|
| 436 |
-
"correctedSrt": {"type": "STRING"},
|
| 437 |
-
},
|
| 438 |
-
"required": ["errorReport", "correctedSrt"],
|
| 439 |
-
}
|
| 440 |
-
|
| 441 |
-
gemini_payload = {
|
| 442 |
-
"systemInstruction": {"parts": [{"text": PROMPTS["VERIFY_BATCH"] if is_batch else PROMPTS["VERIFY_SINGLE"]}]},
|
| 443 |
-
"contents": [{"role": "user", "parts": data["parts"]}],
|
| 444 |
-
"generationConfig": {
|
| 445 |
-
"responseMimeType": "application/json",
|
| 446 |
-
"responseSchema": schema
|
| 447 |
-
},
|
| 448 |
-
"safetySettings": get_safety_settings(),
|
| 449 |
-
}
|
| 450 |
-
|
| 451 |
-
# --- ANALYZE SRT CONTEXT ---
|
| 452 |
-
elif task == "analyze_srt":
|
| 453 |
-
if not data.get("textBatch"):
|
| 454 |
-
raise ValueError("Missing 'data.textBatch'.")
|
| 455 |
-
|
| 456 |
-
gemini_payload = {
|
| 457 |
-
"systemInstruction": {"parts": [{"text": PROMPTS["ANALYZE_CONTEXT"]}]},
|
| 458 |
-
"contents": [{"parts": [{"text": f"Here is the first part of the subtitle file:\n\n{data['textBatch']}"}]}],
|
| 459 |
-
"safetySettings": get_safety_settings(),
|
| 460 |
-
}
|
| 461 |
-
|
| 462 |
-
# --- TRANSLATE ---
|
| 463 |
-
elif task == "translate":
|
| 464 |
-
if not data.get("textBatch"):
|
| 465 |
-
raise ValueError("Missing 'data.textBatch' for translate task.")
|
| 466 |
-
|
| 467 |
-
target_language = data.get("targetLanguage", "Burmese (Myanmar)")
|
| 468 |
-
translate_prompt = PROMPTS["TRANSLATE_BASE"].replace("{{TARGET_LANGUAGE}}", target_language)
|
| 469 |
-
|
| 470 |
-
if data.get("promptVersion") == "basic":
|
| 471 |
-
translate_prompt += PROMPTS["TRANSLATE_BASIC_ADDON"]
|
| 472 |
-
else:
|
| 473 |
-
translate_prompt += PROMPTS["TRANSLATE_NATURAL_ADDON"]
|
| 474 |
-
|
| 475 |
-
# Specific Language Rules
|
| 476 |
-
if "Burmese" in target_language or "Myanmar" in target_language:
|
| 477 |
-
translate_prompt += "\nLANGUAGE SPECIFIC: Omit the Burmese full stop (၊) (။)."
|
| 478 |
-
|
| 479 |
-
# Apply Styles
|
| 480 |
-
style = data.get("style")
|
| 481 |
-
if style == 'channel_myanmar':
|
| 482 |
-
translate_prompt += "\nSTYLE: Channel Myanmar (CM). Tone: Casual, youthful, mainstream movie style. Use 'မင်း/ငါ' for friends/enemies, 'နင်/ငါ' for couples. Slang is allowed but keep it readable."
|
| 483 |
-
elif style == 'cookie_tv':
|
| 484 |
-
translate_prompt += "\nSTYLE: Cookie TV (K-Drama Style). Tone: Polite, warm, emotional. Use 'ရှင်/ကျွန်တော်', 'ကိုကို/မမ'. Focus on romantic and respectful nuances."
|
| 485 |
-
elif style == 'viu':
|
| 486 |
-
translate_prompt += "\nSTYLE: VIU / Netflix Standard. Tone: Professional, modern, balanced formality. No excessive slang. clear and concise."
|
| 487 |
-
elif style == 'mrtv4':
|
| 488 |
-
translate_prompt += "\nSTYLE: MRTV-4 / TV Dubbing. Tone: Formal, dramatic, spoken-word style. Strict grammar. Avoid English loan words if possible."
|
| 489 |
-
elif style == 'disney':
|
| 490 |
-
translate_prompt += "\nSTYLE: Disney+. Tone: Family-friendly, cheerful, clear. Avoid profanity. Use simple, universally understood words."
|
| 491 |
-
elif style == 'jav':
|
| 492 |
-
translate_prompt += ("\nSTYLE: **Adult (18+) Movie / JAV Fan Sub**. \n"
|
| 493 |
-
"TONE: **'Khut-Kyan-Kyan' (ခပ်ကြမ်းကြမ်း)** - Rough, Raw, and Erotic. \n"
|
| 494 |
-
"RULES: \n"
|
| 495 |
-
"1. **Pronouns**: STRICTLY use 'ငါ' (I) and 'နင်' (You). NEVER use polite pronouns like 'ကျွန်တော်/ခင်ဗျား'. \n"
|
| 496 |
-
"2. **Vocabulary**: Use direct, explicit street slang. Do not use formal or medical terms (e.g., use 'လိုး' instead of 'ဆက်ဆံ', 'စို့' instead of 'နမ်း'). \n"
|
| 497 |
-
"3. **Context Mappings**: \n"
|
| 498 |
-
" - 'I'm cumming / Iku' -> 'ပြီးပြီ' or 'ထွက်ကုန်ပြီ' \n"
|
| 499 |
-
" - 'So good / Kimochi' -> 'ကောင်းလိုက်တာ' or 'ရှယ်ပဲ' or 'ဖီးလ်လာပြီ' \n"
|
| 500 |
-
" - 'Stop / Yamete' -> 'မလုပ်နဲ့' or 'တော်ပါတော့' \n"
|
| 501 |
-
" - 'Pussy / Manko' -> 'ပိပိ' \n"
|
| 502 |
-
"4. **Formatting**: Keep sentences short, punchy, and intense. No Burmese punctuation (၊/။).")
|
| 503 |
-
|
| 504 |
-
if data.get("context"):
|
| 505 |
-
translate_prompt += f'\nCONTEXT: The user provided this specific context (IMDB Link or Movie Name or Character Relationships): "{data["context"]}". Use this to look up specific character names, gender relationships, and plot details to improve translation accuracy.'
|
| 506 |
-
|
| 507 |
-
gemini_payload = {
|
| 508 |
-
"systemInstruction": {"parts": [{"text": translate_prompt}]},
|
| 509 |
-
"contents": [{"parts": [{"text": data["textBatch"]}]}],
|
| 510 |
-
"safetySettings": get_safety_settings(),
|
| 511 |
-
}
|
| 512 |
-
|
| 513 |
-
# --- TRANSCRIBE (Standard) ---
|
| 514 |
-
elif task == "transcribe":
|
| 515 |
-
if not data.get("mimeType") or not data.get("audioData"):
|
| 516 |
-
raise ValueError("Missing audio data for transcription.")
|
| 517 |
-
|
| 518 |
-
gemini_payload = {
|
| 519 |
-
"contents": [
|
| 520 |
-
{
|
| 521 |
-
"parts": [
|
| 522 |
-
{"text": PROMPTS["TRANSCRIBE"]},
|
| 523 |
-
{
|
| 524 |
-
"inlineData": {
|
| 525 |
-
"mimeType": data["mimeType"],
|
| 526 |
-
"data": data["audioData"]
|
| 527 |
-
}
|
| 528 |
-
}
|
| 529 |
-
]
|
| 530 |
-
}
|
| 531 |
-
],
|
| 532 |
-
"safetySettings": get_safety_settings(),
|
| 533 |
-
}
|
| 534 |
-
|
| 535 |
-
# --- TRANSCRIBE CHUNK ---
|
| 536 |
-
elif task == "transcribe_chunk":
|
| 537 |
-
if not data.get("mimeType") or not data.get("audioData"):
|
| 538 |
-
raise ValueError("Missing audio data for chunk transcription.")
|
| 539 |
-
|
| 540 |
-
gemini_payload = {
|
| 541 |
-
"contents": [
|
| 542 |
-
{
|
| 543 |
-
"parts": [
|
| 544 |
-
{"text": PROMPTS["TRANSCRIBE_CHUNK"]},
|
| 545 |
-
{
|
| 546 |
-
"inlineData": {
|
| 547 |
-
"mimeType": data["mimeType"],
|
| 548 |
-
"data": data["audioData"]
|
| 549 |
-
}
|
| 550 |
-
}
|
| 551 |
-
]
|
| 552 |
-
}
|
| 553 |
-
],
|
| 554 |
-
"generationConfig": {"responseMimeType": "application/json"},
|
| 555 |
-
"safetySettings": get_safety_settings(),
|
| 556 |
-
}
|
| 557 |
-
|
| 558 |
-
# --- VIDEO ANALYZER ---
|
| 559 |
-
elif task == "analyze_video":
|
| 560 |
-
if not data.get("videoUrl"):
|
| 561 |
-
raise ValueError("Missing 'data.videoUrl'.")
|
| 562 |
-
|
| 563 |
-
gemini_payload = {
|
| 564 |
-
"systemInstruction": {"parts": [{"text": PROMPTS["ANALYZE_VIDEO"]}]},
|
| 565 |
-
"contents": [{
|
| 566 |
-
"parts": [
|
| 567 |
-
{"file_data": {"file_uri": data["videoUrl"], "mime_type": "video/mp4"}},
|
| 568 |
-
{"text": "Please analyze this video based on the system instructions and return the JSON moderation report."}
|
| 569 |
-
]
|
| 570 |
-
}],
|
| 571 |
-
"generationConfig": {"responseMimeType": "application/json"},
|
| 572 |
-
"safetySettings": get_safety_settings(),
|
| 573 |
-
}
|
| 574 |
-
|
| 575 |
-
# --- ANCHOR FINDER ---
|
| 576 |
-
elif task == "anchor_find":
|
| 577 |
-
if not data.get("payload"):
|
| 578 |
-
raise ValueError("Missing 'data.payload'.")
|
| 579 |
-
|
| 580 |
-
gemini_payload = {
|
| 581 |
-
"systemInstruction": {"parts": [{"text": PROMPTS["ANCHOR_FINDER"]}]},
|
| 582 |
-
"contents": [{"parts": [{"text": data["payload"]}]}],
|
| 583 |
-
"generationConfig": {
|
| 584 |
-
"responseMimeType": "application/json",
|
| 585 |
-
"responseSchema": data.get("responseSchema") # Dynamic schema
|
| 586 |
-
},
|
| 587 |
-
"safetySettings": get_safety_settings(),
|
| 588 |
-
}
|
| 589 |
-
|
| 590 |
-
# --- REPHRASER ---
|
| 591 |
-
elif task == "rephrase":
|
| 592 |
-
if not data.get("textBatch"):
|
| 593 |
-
raise ValueError("Missing 'data.textBatch'.")
|
| 594 |
-
|
| 595 |
-
rephrase_system_prompt = data.get("customPrompt") if data.get("customPrompt") else PROMPTS["REPHRASE_DEFAULT"]
|
| 596 |
-
|
| 597 |
-
gemini_payload = {
|
| 598 |
-
"systemInstruction": {"parts": [{"text": rephrase_system_prompt}]},
|
| 599 |
-
"contents": [{"parts": [{"text": data["textBatch"]}]}],
|
| 600 |
-
"safetySettings": get_safety_settings(),
|
| 601 |
-
}
|
| 602 |
-
|
| 603 |
-
# --- LINE FIXER ---
|
| 604 |
-
elif task == "line_fixer":
|
| 605 |
-
if not data.get("textBatch"):
|
| 606 |
-
raise ValueError("Missing 'data.textBatch'.")
|
| 607 |
-
|
| 608 |
-
mode = data.get("mode")
|
| 609 |
-
fixer_prompt = ""
|
| 610 |
-
if mode == "burmese":
|
| 611 |
-
fixer_prompt = PROMPTS["LINE_FIXER_BURMESE"]
|
| 612 |
-
elif mode == "english":
|
| 613 |
-
fixer_prompt = PROMPTS["LINE_FIXER_ENGLISH"]
|
| 614 |
-
else:
|
| 615 |
-
fixer_prompt = PROMPTS["LINE_FIXER_GENERAL"]
|
| 616 |
-
|
| 617 |
-
if data.get("preventOverlap"):
|
| 618 |
-
fixer_prompt += PROMPTS["LINE_FIXER_OVERLAP_ADDON"]
|
| 619 |
-
|
| 620 |
-
gemini_payload = {
|
| 621 |
-
"systemInstruction": {"parts": [{"text": fixer_prompt}]},
|
| 622 |
-
"contents": [{"parts": [{"text": data["textBatch"]}]}],
|
| 623 |
-
"safetySettings": get_safety_settings(),
|
| 624 |
-
}
|
| 625 |
-
|
| 626 |
-
# --- SUSPECT CHECK ---
|
| 627 |
-
elif task == "suspect_check":
|
| 628 |
-
if not data.get("textBatch"):
|
| 629 |
-
raise ValueError("Missing 'data.textBatch'.")
|
| 630 |
-
|
| 631 |
-
gemini_payload = {
|
| 632 |
-
"systemInstruction": {"parts": [{"text": PROMPTS["SUSPECT_CHECK"]}]},
|
| 633 |
-
"contents": [{"parts": [{"text": data["textBatch"]}]}],
|
| 634 |
-
"generationConfig": {"responseMimeType": "application/json"},
|
| 635 |
-
"safetySettings": get_safety_settings(),
|
| 636 |
-
}
|
| 637 |
-
|
| 638 |
-
else:
|
| 639 |
-
raise HTTPException(status_code=400, detail=f"Unknown task: {task}")
|
| 640 |
-
|
| 641 |
-
except Exception as e:
|
| 642 |
-
raise HTTPException(status_code=400, detail=f"Payload Construction Error: {str(e)}")
|
| 643 |
-
|
| 644 |
-
# --- EXECUTE GEMINI CALL ---
|
| 645 |
-
# Manually construct URL using cleaned inputs
|
| 646 |
-
base_url = f"[https://generativelanguage.googleapis.com/v1beta/models/](https://generativelanguage.googleapis.com/v1beta/models/){model}:generateContent"
|
| 647 |
-
|
| 648 |
-
# Use repr() to log invisible chars for debugging
|
| 649 |
-
logger.info(f"DEBUG URL: {repr(base_url)}")
|
| 650 |
-
|
| 651 |
-
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 652 |
-
try:
|
| 653 |
-
# Pass API key as query param to avoid it contaminating the URL string manually
|
| 654 |
-
api_response = await client.post(
|
| 655 |
-
base_url,
|
| 656 |
-
params={"key": api_key},
|
| 657 |
-
json=gemini_payload,
|
| 658 |
-
headers={"Content-Type": "application/json"}
|
| 659 |
-
)
|
| 660 |
-
except httpx.UnsupportedProtocol as e:
|
| 661 |
-
logger.error(f"Protocol Error: {e}")
|
| 662 |
-
raise HTTPException(status_code=500, detail=f"Server Protocol Error (Check inputs): {str(e)}")
|
| 663 |
-
except Exception as e:
|
| 664 |
-
logger.error(f"Connection Error: {e}")
|
| 665 |
-
raise HTTPException(status_code=500, detail=f"Connection/Network Error: {repr(e)}")
|
| 666 |
-
|
| 667 |
-
if api_response.status_code != 200:
|
| 668 |
-
error_body = api_response.text
|
| 669 |
-
raise HTTPException(
|
| 670 |
-
status_code=api_response.status_code,
|
| 671 |
-
detail=f"Gemini API Error: {api_response.status_code} {api_response.reason_phrase}. Response: {error_body}"
|
| 672 |
-
)
|
| 673 |
-
|
| 674 |
-
gemini_data = api_response.json()
|
| 675 |
-
return gemini_data
|
| 676 |
-
|
| 677 |
-
# --- RUN INSTRUCTION ---
|
| 678 |
-
# Save as main.py
|
| 679 |
-
# Run with: uvicorn main:app --reload
|
|
|
|
| 1 |
+
# Use the official Python image
|
| 2 |
+
FROM python:3.10
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
# Set the working directory
|
| 5 |
+
WORKDIR /code
|
| 6 |
|
| 7 |
+
# Copy the requirements file and install dependencies
|
| 8 |
+
COPY ./requirements.txt /code/requirements.txt
|
| 9 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
+
# Copy the application code
|
| 12 |
+
COPY ./app.py /code/app.py
|
|
|
|
| 13 |
|
| 14 |
+
# Create a non-root user (Hugging Face security requirement)
|
| 15 |
+
RUN useradd -m -u 1000 user
|
| 16 |
+
USER user
|
| 17 |
+
ENV HOME=/home/user \
|
| 18 |
+
PATH=/home/user/.local/bin:$PATH
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
+
# Run the application on port 7860
|
| 21 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|