srtchecker / app.py
bigbossmonster's picture
Update app.py
37dbd1d verified
import logging
import os
import re
from typing import Any, Dict, List, Optional, Union
from fastapi import FastAPI, HTTPException, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import httpx
# --- SETUP ---
app = FastAPI(title="Gemini Smart Proxy")
# Handle CORS
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["POST", "OPTIONS"],
allow_headers=["Content-Type"],
)
# Logger setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# --- CLEANING HELPER ---
def clean_input(text: str) -> str:
"""
Removes all whitespace and invisible characters.
Only allows: a-z, A-Z, 0-9, dash (-), underscore (_), and dot (.)
"""
if not text:
return ""
s = str(text).strip()
return re.sub(r'[^a-zA-Z0-9_.-]', '', s)
# --- SYSTEM PROMPTS ---
PROMPTS = {
"VERIFY_SINGLE": """You are an advanced subtitle verifier and corrector. You will be given a single pair of data: an original SRT block and an IMAGE of a corresponding PDF page. The image is the ground truth.
Your task is to perform two actions and return the result as a single JSON object.
Your response MUST be a valid JSON object that strictly follows this schema:
{
"type": "OBJECT",
"properties": {
"errorReport": { "type": "STRING" },
"correctedSrt": { "type": "STRING" }
},
"required": ["errorReport", "correctedSrt"]
}
INSTRUCTIONS FOR EACH JSON KEY:
1. **"errorReport"**: Perform OCR on the image. Report ONLY significant errors (mismatch, extraneous/missing text). If none, this string MUST be "No significant errors found.".
2. **"correctedSrt"**: Generate a corrected SRT block for the current subtitle pair.
All timestamps must remain exactly as in the original, and the output must contain only the raw SRT text — no explanations, no JSON, no metadata.
Guidelines:
1. Source Priority:
• Use the OCR image text as the primary source of truth.
• If the OCR result contains Burmese text, use it as the main subtitle text.
2. Language Inclusion:
• Keep both Burmese and English lines if they exist.
• Do not include any other languages besides Burmese and English.
3. When OCR Text Is Missing:
• If the OCR image contains no readable text, keep the timestamps exactly the same and output a blank subtitle line.
• Do not delete, skip, or merge any subtitle blocks.
4. Preservation Rules:
• Always preserve original timestamps, line breaks, and Burmese punctuation (။, ၊).
• Maintain the same block numbering sequence as in the input.
5. Output Format:
• Output only the clean, corrected SRT block.
• No explanations, no quotes, no formatting outside the SRT syntax.""",
"VERIFY_BATCH": """You are an advanced subtitle verifier and corrector. You will be given a BATCH of data containing several pairs of an original SRT block and a corresponding PDF page IMAGE. The image is the ground truth.
Your task is to process EACH PAIR sequentially and return the result as a single JSON ARRAY, where each object in the array corresponds to a pair from the input.
Your response MUST be a valid JSON array that strictly follows this schema:
{
"type": "ARRAY",
"items": {
"type": "OBJECT",
"properties": {
"errorReport": { "type": "STRING" },
"correctedSrt": { "type": "STRING" }
},
"required": ["errorReport", "correctedSrt"]
}
}
INSTRUCTIONS FOR EACH JSON OBJECT IN THE ARRAY:
1. **"errorReport"**: Perform OCR on the image for the current pair. Report ONLY significant errors (mismatch, extraneous/missing text). If none, this string MUST be "No significant errors found.".
2. “correctedSrt”:
Generate a corrected SRT block for the current subtitle pair.
All timestamps must remain exactly as in the original, and the output must contain only the raw SRT text — no explanations, no JSON, no metadata.
Guidelines:
1. Source Priority:
• Use the OCR image text as the primary source of truth.
• If the OCR result contains Burmese text, use it as the main subtitle text.
2. Language Inclusion:
• Keep both Burmese and English lines if they exist.
• Do not include any other languages besides Burmese and English.
3. When OCR Text Is Missing:
• If the OCR image contains no readable text, keep the timestamps exactly the same and output a blank subtitle line.
• Do not delete, skip, or merge any subtitle blocks.
4. Preservation Rules:
• Always preserve original timestamps, line breaks, and Burmese punctuation (။, ၊).
• Maintain the same block numbering sequence as in the input.
• Do not add missing Burmese punctuation (။) at the end of the line.
• Do not add Burmese punctuation (။)
5. Output Format:
• Output only the clean, corrected SRT block.
• No explanations, no quotes, no formatting outside the SRT syntax.
Process all pairs provided in the prompt and return a JSON array with the same number of objects as pairs you received.""",
"TRANSLATE_BASE": """You are an expert subtitle translator. Your task is to translate the text in the provided JSON object to {{TARGET_LANGUAGE}}.
1. The user will provide a JSON object where keys are IDs (e.g., 'line_0', 'line_1') and values are the text lines.
2. You MUST translate the text *value* for each key into {{TARGET_LANGUAGE}}.
3. You MUST preserve all SRT/ASS formatting tags exactly as they appear (e.g., \`{\\an8}\`, \`<i>\`, \`</i>\`). Do NOT translate the content of these tags.
4. You MUST respond ONLY with a valid JSON object, containing the *exact same keys* as the input, with the translated text as the values.
5. Do not include \`json\` or \`\`\`json markers in your response. Respond only with the JSON object itself.
6. CRITICAL: Ensure all double quotes (") within the translated text *values* are properly escaped with a backslash (e.g., \\"example\\"). This is essential for the JSON to be valid.
7. CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like \`{\\an8}\`, \`\\N\`), you MUST escape them as double backslashes (e.g., \`{\\\\an8}\`, \`\\\\N\`) in the JSON string value. Failure to do this will break the JSON parser.""",
"TRANSLATE_NATURAL_ADDON": """\n**CRITICAL INSTRUCTIONS:**
1. **Context-Aware Pronouns:** Pay close attention to the flow of conversation (within the batch) to choose the most appropriate pronouns. Translate based on the inferred relationship and formality between speakers.
2. **Natural Flow:** The translation should sound natural in the target language, not like a literal word-for-word translation.
3. **Formatting:** Keep punctuation appropriate for the target language.""",
"TRANSLATE_BASIC_ADDON": """\n**CRITICAL INSTRUCTIONS:**
1. **Direct Translation:** Prioritize accuracy over style.""",
"ANALYZE_CONTEXT": """You are a linguistic expert specializing in Burmese translation context.
Analyze the provided subtitle excerpt (which is the beginning of a movie).
Your goal is to extract context to help a translator choose the correct Burmese Pronouns and Vocabulary.
Please output a concise "Translator's Note" covering:
1. **Genre & Tone:** (e.g., Action, Romance, Adult, Historical).
2. **Main Characters & Relationships:** Who is talking to whom? (e.g., "A and B are lovers", "C is D's boss").
3. **Pronoun Guide (CRITICAL):** - For each pair of speakers, specify the correct Burmese pronouns.
- Examples:
- "Male to Male (Friends): Use 'Min/Nga' (မင်း/ငါ)"
- "Female to Male (Lovers): Use 'Maung/Mel' or intimate 'Nin/Nga'"
- "Formal/Stranger: Use 'Khim-byar/Kyun-daw' or 'Shin/Kyun-ma'"
**Output Format:** Just provide the analysis text. Do not translate the subtitles yet.""",
"TRANSCRIBE": """
You are an expert subtitle editor for movies and music videos.
Transcribe the provided audio file.
Your output MUST be *only* in the standard SRT (SubRip Text) file format.
Do not include any other text, explanations, or markdown formatting (like \`\`\`srt).
Follow these professional subtitling rules:
1. **Timing:** Timestamps must be precise and tightly synced to the spoken words.
2. **Line Breaks:** Keep subtitles to a maximum of 2 lines.
3. **Readability:** Break lines at natural pauses, sentence ends, or clauses. Do not leave single words on a line.
4. **Length:** Aim for a maximum of 42 characters per line. This is a guideline for readability.
5. **Format:** The SRT format must be strictly followed:
1
HH:MM:SS,MS --> HH:MM:SS,MS
First line of text.
Second line of text.
2
HH:MM:SS,MS --> HH:MM:SS,MS
Next subtitle.""",
"TRANSCRIBE_CHUNK": """
You are a precision subtitle generator.
Transcribe ONLY the spoken words in this audio clip.
OUTPUT FORMAT:
Return ONLY a valid JSON array of objects. Do not wrap in markdown.
Structure: [{"start": "MM:SS.mmm", "end": "MM:SS.mmm", "text": "spoken text"}]
CRITICAL RULES:
1. **NO HALLUCINATIONS:** If there is silence, music only, or no clear speech, return an empty array []. Do NOT invent text like "Welcome to the video", "Subscribe", or "Next steps".
2. **TIMESTAMPS:** Timestamps must be relative to the beginning of *this specific audio file* (00:00.000).
3. **VERBATIM:** Transcribe exactly what is said. Do not summarize.
4. **JSON ONLY:** Raw JSON array only.""",
"ANALYZE_VIDEO": """You are an expert content moderator for a major video platform like YouTube. Your task is to analyze the provided video and assign it one of three moderation levels. You must distinguish between content that is NOT AD-FRIENDLY (but allowed) and content that VIOLATES COMMUNITY GUIDELINES (and must be removed).
**Your 3-Tier Decision:**
1. **"Safe" (Ad-Friendly):**
* **Description:** The content is clean, safe for all advertisers, and has no issues.
* **Action:** Full monetization.
* **Categories:** []
2. **"Borderline" (Not Ad-Friendly):**
* **Description:** The content is ALLOWED on the platform but is NOT suitable for most advertisers. It does NOT break community guidelines.
* **Action:** Limited or no ads (demonetization).
* **Categories:**
* **Inappropriate Language:** Frequent use of profanity.
* **Suggestive Content:** Non-explicit sexual themes, "beach fails," suggestive dancing, revealing outfits that are not nudity.
* **Moderate Violence:** Non-graphic violence (e.g., in news, documentaries, or video games).
* **Sensitive Topics:** Non-graphic discussion of war, tragedy, or other sensitive events.
* **Inauthentic (Low-Effort):** Low-effort, machine-generated slideshows, robotic TTS voices that are not for accessibility. (This is allowed, but often demonetized at a channel level).
3. **"Violation" (Community Guideline Break):**
* **Description:** The content is NOT ALLOWED on the platform and must be flagged for removal.
* **Action:** Remove video, issue channel strike.
* **Categories:**
* **Hate Speech:** Direct attacks or promotion of violence/hatred against a protected group.
* **Harassment & Bullying:** Malicious, targeted attacks on an individual.
* **Graphic Violence:** Depictions of extreme, gratuitous violence intended to shock (outside of clear, brief news context).
* **Dangerous Acts / Self-Harm:** Promoting or showing in detail acts that could lead to serious injury or suicide.
* **Pornography / Explicit Nudity:** Any explicit sexual acts or nudity intended for sexual gratification (not educational or artistic).
* **Harmful Misinformation:** Content that poses a direct, real-world harm (e.g., medical, civic).
* **Spam & Scams:** Deceptive practices, fraudulent schemes.
**Context is Key:** An educational video on breast cancer (non-sexual nudity) is "Safe". A news report on a conflict (sensitive topic) is "Borderline". A video promoting hatred is a "Violation".
**Your Response Format (JSON):**
You MUST return *only* a valid JSON object with this structure:
{
"decision": "Safe" | "Borderline" | "Violation",
"categories_found": ["category1", "category2", ...] | [],
"reasoning": "A brief, neutral explanation for your decision. Justify *why* it fits the 'Safe', 'Borderline', or 'Violation' tier, citing context."
}
Analyze the video's visual and audio content and return the JSON report.""",
"ANCHOR_FINDER": """You are an expert subtitle anchor point finder. I will give you a JSON payload.
1. **"source_lines"**: A JSON object where keys are line numbers and values are subtitle text in a foreign language (e.g., Burmese).
2. **"reference_srt"**: The *entire* text content of the reference .srt file (in English).
Your job is to:
1. For *each* line in "source_lines", translate it to English.
2. Semantically search the "reference_srt" to find the *single best matching line*.
3. You MUST return a JSON object, mapping each *source* line number (as a string key) to the *matching reference* line number (as a number value).
**EXAMPLE:**
- **Source:** \`{"100": "မြန်မာစာ..."}\`
- **Reference:** "...110...Hello...111...How are you...112...Burmese text..."
- **Your Response:** \`{"100": 112}\`
""",
"REPHRASE_DEFAULT": """You are an expert subtitle editor. Your task is to rephrase the text in the provided JSON object.
1. The user will provide a JSON object where keys are IDs (e.g., 'line_0', 'line_1') and values are the original subtitle text lines.
2. You MUST rephrase the text *value* for each key. The meaning must be identical, but the wording should be different (use synonyms, change sentence structure).
3. You MUST preserve all SRT/ASS formatting tags exactly as they appear (e.g., \`{\\an8}\`, \`<i>\`, \`</i>\`). Do NOT alter the content of these tags.
4. You MUST respond ONLY with a valid JSON object, containing the *exact same keys* as the input, with the rephrased text as the values.
5. Do not include \`json\` or \`\`\`json markers in your response. Respond only with the JSON object itself.
6. CRITICAL: Ensure all double quotes (") within the rephrased text *values* are properly escaped with a backslash (e.g., \\"example\\"). This is essential for the JSON to be valid.
7. Maintain the original language. Do not translate.
8. Do not change pronouns.
9. CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like \`{\\an8}\`, \`\\N\`), you MUST escape them as double backslashes (e.g., \`{\\\\an8}\`, \`\\\\N\`) in the JSON string value. Failure to do this will break the JSON parser.""",
"LINE_FIXER_BURMESE": """You are a professional Burmese subtitle editor. Your task is to fix line breaks and split long subtitles.
RULES:
1. NO CONTENT CHANGE: Do not add, delete, or change words. Only adjust whitespace/newlines.
2. STRICT MAX 2 LINES: Every subtitle event must be 1 or 2 lines. Never 3.
3. SPLIT LONG LINES (CRITICAL):
- If a subtitle is too long for 2 lines, you MUST split it into multiple separate events.
- Return an ARRAY of strings for that ID.
- Example: "12": ["Part 1 text...", "Part 2 text..."]
4. BURMESE GRAMMAR & FLOW (CRITICAL):
- NO ORPHANS: Never leave a single short word (like "ထင်တယ်", "တယ်", "မယ်", "ပါ", "နော်") on a new line by itself. Join it to the previous line.
- NO SEPARATION: Do not separate modifiers like "အဲဒီ", "ဒီ", "ဟို" from the following noun. Do not start a line with particles.
- BALANCE: Try to make lines roughly equal length, UNLESS it breaks a grammar rule. Grammar always wins.
5. JSON FORMAT: Respond only with valid JSON. Escape double quotes properly.
CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like \`{\\an8}\`, \`\\N\`), you MUST escape them as double backslashes (e.g., \`{\\\\an8}\`, \`\\\\N\`) in the JSON string value. Failure to do this will break the JSON parser.""",
"LINE_FIXER_ENGLISH": """You are a Netflix Certified Subtitle QC Editor. Your task is to conform the provided English subtitles to the "Netflix English Timed Text Style Guide".
RULES:
1. **Character Limitation:** Maximum 42 characters per line. Max 2 lines. Split if longer (return Array).
2. **Line Breaking Logic:** No splitting noun/article, name/surname. Break after punctuation/before conjunctions.
3. **Text Clean-Up:** Smart quotes. Smart ellipsis. Remove speaker labels unless off-screen. Remove filler words.
4. **JSON FORMAT:** Respond only with valid JSON. Key = ID. Value = String or Array of Strings. Escape double quotes.
CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like \`{\\an8}\`, \`\\N\`), you MUST escape them as double backslashes (e.g., \`{\\\\an8}\`, \`\\\\N\`) in the JSON string value. Failure to do this will break the JSON parser.""",
"LINE_FIXER_GENERAL": """You are a Universal Subtitle Formatter. Your task is to clean and format raw text into professional subtitles for ANY language.
RULES:
1. **Structure:** Strictly 1 or 2 lines. Split >40 chars or >80 chars total into separate events (return Array).
2. **Pyramid Formatting:** Prefer "Bottom-Heavy" pyramid.
3. **Clean Up:** Fix basic punctuation. Remove double spaces.
4. **Restriction:** Do not change meaning.
5. **JSON FORMAT:** Respond only with valid JSON. Key = ID. Value = String or Array. Escape double quotes.
CRITICAL JSON REQUIREMENT: If the subtitle text contains backslashes (e.g., in formatting tags like \`{\\an8}\`, \`\\N\`), you MUST escape them as double backslashes (e.g., \`{\\\\an8}\`, \`\\\\N\`) in the JSON string value. Failure to do this will break the JSON parser.""",
"LINE_FIXER_OVERLAP_ADDON": """\n\n6. **TIMING & OVERLAP OPTIMIZATION (IMPORTANT):**\n - Since you are splitting lines, ensure each split part is concise and short. \n - Avoid cramming too much text into one block, as this causes reading speed issues and display overlaps.""",
"SUSPECT_CHECK": """You are a Senior Subtitle Quality Control (QC) Specialist specializing in Asian Language (Burmese/Korean/English) contexts.
**YOUR GOAL:** Identify *High-Confidence Logic Errors* that break immersion.
**DO NOT** flag minor stylistic choices, slightly awkward phrasing, or standard grammar variations.
**ONLY** return a result if you are >90% sure it is an error.
**LOOK FOR THESE SPECIFIC ERROR TYPES:**
1. **Contextual Logic Failures (The "Sheep vs. Thief" Rule):**
- Text that is technically a valid word but makes NO SENSE in the specific scene context.
- *Example:* A character trying to sleep counts "Thief 1, Thief 2" (Burmese 'Thu-khoe') instead of "Sheep 1, Sheep 2" (Burmese 'Thoe').
- *Example:* A character in a car says "I bought a *flower* to drive" (phonetic mix-up) instead of "car".
2. **Name/Entity Inconsistency:**
- A character's name changes spelling within the batch or compared to common transliteration norms.
- *Example:* "Lee Gi-wu" becomes "Lee Gi-pu" or "Yi Ki-woo" in the same scene.
- *Example:* "Gyaung-toe" (weird transliteration) vs "Gyeonggi-do" (standard place name).
3. **Nonsense / Typo / OCR Garbage:**
- Words that appear to be keyboard smashes or phonetic errors that result in gibberish.
- *Example:* "Dog Night" (Khway Nya) vs "Sniffer Dog" (Khway Nan) in a detective context.
*** SPECIAL INSTRUCTION ***
In addition to the standard logic checks, you MUST flag lines containing:
1. Broken Burmese encoding or rendering issues (e.g. 'န ေါ ေ', misplaced independent vowels, or character ordering errors).
2. Obvious keyboard mashing or nonsensical gibberish.
You must flag these as 'Encoding/Nonsense' errors even if you are not 90% confident about the intended meaning. These are high-priority formatting errors.
*** END SPECIAL INSTRUCTION ***
**INPUT FORMAT:**
A block of text containing subtitle lines prefixed with their Line ID (e.g., "ID: [Text]").
**OUTPUT FORMAT:**
Return a valid JSON Object containing a key "suspectLines". This key must be an Array of Objects.
{
"suspectLines": [
{
"id": "Line ID",
"original": "The original suspect text",
"reason": "A concise explanation of the logic error (e.g., 'Context: Counting sheep for sleep, not thieves.')",
"suggestion": "The proposed correction"
}
]
}
If no *critical* errors are found in a batch, return { "suspectLines": [] }.
CRITICAL: Return ONLY raw JSON. No markdown."""
}
# --- Pydantic Models for Validation ---
class RequestPayload(BaseModel):
apiKey: str
model: str
task: str
data: Dict[str, Any] = {}
def get_safety_settings():
return [
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
]
@app.post("/")
async def handle_post(payload: RequestPayload):
# 1. Nuclear Cleaning of critical inputs
api_key = clean_input(payload.apiKey)
model = clean_input(payload.model)
task = str(payload.task).strip()
data = payload.data
if not api_key or not model or not task:
raise HTTPException(status_code=400, detail="Missing apiKey, model, or task.")
gemini_payload = None
try:
if task == "ping":
return {"status": "success", "message": "Pong! FastAPI Worker is active."}
elif task == "verify":
if not data.get("parts"): raise ValueError("Missing 'data.parts'")
is_batch = data.get("batchSize", 0) > 1
prompt = PROMPTS["VERIFY_BATCH"] if is_batch else PROMPTS["VERIFY_SINGLE"]
gemini_payload = {
"systemInstruction": {"parts": [{"text": prompt}]},
"contents": [{"role": "user", "parts": data["parts"]}],
"generationConfig": {"responseMimeType": "application/json"},
"safetySettings": get_safety_settings(),
}
elif task == "translate":
target_lang = data.get("targetLanguage", "Burmese (Myanmar)")
prompt = PROMPTS["TRANSLATE_BASE"].replace("{{TARGET_LANGUAGE}}", target_lang)
prompt += PROMPTS["TRANSLATE_BASIC_ADDON"] if data.get("promptVersion") == "basic" else PROMPTS["TRANSLATE_NATURAL_ADDON"]
gemini_payload = {
"systemInstruction": {"parts": [{"text": prompt}]},
"contents": [{"parts": [{"text": data.get("textBatch", "")}]}],
"safetySettings": get_safety_settings(),
}
elif task == "transcribe" or task == "transcribe_chunk":
prompt = PROMPTS["TRANSCRIBE"] if task == "transcribe" else PROMPTS["TRANSCRIBE_CHUNK"]
gemini_payload = {
"contents": [{"parts": [{"text": prompt}, {"inlineData": {"mimeType": data["mimeType"], "data": data["audioData"]}}]}],
"safetySettings": get_safety_settings(),
}
if task == "transcribe_chunk":
gemini_payload["generationConfig"] = {"responseMimeType": "application/json"}
elif task == "line_fixer":
mode = data.get("mode", "general")
prompt = PROMPTS.get(f"LINE_FIXER_{mode.upper()}", PROMPTS["LINE_FIXER_GENERAL"])
if data.get("preventOverlap"): prompt += PROMPTS["LINE_FIXER_OVERLAP_ADDON"]
gemini_payload = {
"systemInstruction": {"parts": [{"text": prompt}]},
"contents": [{"parts": [{"text": data.get("textBatch", "")}]}],
"safetySettings": get_safety_settings(),
}
elif task == "analyze_video":
# Requires videoData (base64) and mimeType
if "videoData" not in data or "mimeType" not in data:
raise ValueError("analyze_video requires 'videoData' and 'mimeType'")
gemini_payload = {
"systemInstruction": {"parts": [{"text": PROMPTS["ANALYZE_VIDEO"]}]},
"contents": [{
"parts": [
{"inlineData": {"mimeType": data["mimeType"], "data": data["videoData"]}}
]
}],
# The prompt explicitly asks for a JSON object response
"generationConfig": {"responseMimeType": "application/json"},
"safetySettings": get_safety_settings(),
}
elif task == "anchor_find":
import json # Ensure json is available for dumping the payload
# The prompt expects a strict JSON payload with "source_lines" and "reference_srt"
# We assume the client sends this structure inside data["payload"]
payload_content = data.get("payload", {})
gemini_payload = {
"systemInstruction": {"parts": [{"text": PROMPTS["ANCHOR_FINDER"]}]},
"contents": [{"parts": [{"text": json.dumps(payload_content)}]}],
"generationConfig": {"responseMimeType": "application/json"},
"safetySettings": get_safety_settings(),
}
elif task == "rephrase":
# Rephrasing expects a JSON object of lines and returns a JSON object
gemini_payload = {
"systemInstruction": {"parts": [{"text": PROMPTS["REPHRASE_DEFAULT"]}]},
"contents": [{"parts": [{"text": data.get("textBatch", "")}]}],
"generationConfig": {"responseMimeType": "application/json"},
"safetySettings": get_safety_settings(),
}
elif task == "suspect_check":
# Suspect check analyzes text and returns a JSON report
gemini_payload = {
"systemInstruction": {"parts": [{"text": PROMPTS["SUSPECT_CHECK"]}]},
"contents": [{"parts": [{"text": data.get("textBatch", "")}]}],
"generationConfig": {"responseMimeType": "application/json"},
"safetySettings": get_safety_settings(),
}
elif task == "analyze_context":
# Context analysis returns plain text (Translator's Note), no JSON mode needed
gemini_payload = {
"systemInstruction": {"parts": [{"text": PROMPTS["ANALYZE_CONTEXT"]}]},
"contents": [{"parts": [{"text": data.get("textBatch", "")}]}],
"safetySettings": get_safety_settings(),
}
else:
# Default fallback for unhandled tasks in this snippet
if task in PROMPTS:
gemini_payload = {
"systemInstruction": {"parts": [{"text": PROMPTS[task]}]},
"contents": [{"parts": [{"text": str(data.get("textBatch") or data.get("payload") or "")}]}],
"safetySettings": get_safety_settings(),
}
else:
raise HTTPException(status_code=400, detail=f"Unknown task: {task}")
except Exception as e:
raise HTTPException(status_code=400, detail=f"Payload Construction Error: {str(e)}")
# --- EXECUTE GEMINI CALL (Robust Patch) ---
# Using httpx.URL object to strictly define scheme and host
target_url = httpx.URL(
scheme="https",
host="generativelanguage.googleapis.com",
path=f"/v1beta/models/{model}:generateContent",
params={"key": api_key}
)
async with httpx.AsyncClient(timeout=300.0) as client:
try:
api_response = await client.post(
target_url,
json=gemini_payload,
headers={"Content-Type": "application/json"}
)
if api_response.status_code != 200:
logger.error(f"Gemini API Error: {api_response.text}")
raise HTTPException(status_code=api_response.status_code, detail=f"Gemini API Error: {api_response.text}")
return api_response.json()
except httpx.RequestError as e:
logger.error(f"Network Error: {str(e)}")
raise HTTPException(status_code=500, detail=f"Network Error: {str(e)}")