Spaces:
Paused
Paused
| # app.py — เวอร์ชันบังคับ TH↔EN ให้ตรงภาษาเอาต์พุต | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from openai import AsyncOpenAI | |
| from langdetect import detect, DetectorFactory | |
| import asyncio, re | |
| DetectorFactory.seed = 0 | |
| client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="ollama") | |
| MODEL = "scb10x/typhoon-translate-4b" | |
| app = FastAPI(title="HF Space · Ollama Translator") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], | |
| ) | |
| class TranslateReq(BaseModel): | |
| text: str | |
| target: str = "en" # "en" หรือ "th" | |
| source: str = "auto" # "auto" = เดาภาษาให้ | |
| LANG_NAME = {"en": "English", "th": "Thai"} | |
| TH_RANGE = (0x0E00, 0x0E7F) | |
| def guess_lang(text: str) -> str: | |
| try: | |
| lg = detect(text) | |
| return "th" if lg.startswith("th") else ("en" if lg.startswith("en") else lg) | |
| except Exception: | |
| return "auto" | |
| def target_ratio(text: str, tgt: str) -> float: | |
| letters = [c for c in text if c.isalpha()] | |
| if not letters: | |
| return 0.0 | |
| if tgt == "th": | |
| n_target = sum(TH_RANGE[0] <= ord(c) <= TH_RANGE[1] for c in letters) | |
| elif tgt == "en": | |
| n_target = sum('A' <= c <= 'Z' or 'a' <= c <= 'z' for c in letters) | |
| else: | |
| n_target = 0 | |
| return n_target / len(letters) | |
| def is_target_strict(text: str, tgt: str, threshold: float = 0.85) -> bool: | |
| # ต้องเป็นอักษรภาษาปลายทาง ≥ threshold ของอักขระตัวอักษรทั้งหมด | |
| return target_ratio(text, tgt) >= threshold | |
| def split_text(text: str, max_chars: int = 1200): | |
| parts, buf = [], "" | |
| for tok in re.split(r'(\n+|[.!?。!?])', text): | |
| if tok is None: continue | |
| if len(buf) + len(tok) <= max_chars: | |
| buf += tok | |
| else: | |
| if buf: parts.append(buf) | |
| buf = tok | |
| if buf: parts.append(buf) | |
| return parts | |
| def build_system(src: str, tgt: str) -> str: | |
| ex = ( | |
| # few-shot ตัวอย่างช่วยให้ไม่คงภาษาเดิม | |
| "EXAMPLES:\n" | |
| "Input (English): Hello!\n" | |
| "Output (Thai): สวัสดี!\n\n" | |
| "Input (English): The quick brown fox jumps over the lazy dog.\n" | |
| "Output (Thai): สุนัขจิ้งจอกสีน้ำตาลกระโดดข้ามสุนัขขี้เกียจ\n\n" | |
| "Input (Thai): ยินดีที่ได้รู้จัก\n" | |
| "Output (English): Nice to meet you\n" | |
| ) | |
| return ( | |
| f"You are a professional MT engine for Thai↔English.\n" | |
| f"TASK: Translate {src} → {tgt}.\n" | |
| f"REQUIREMENTS:\n" | |
| f"- Output MUST be 100% in {LANG_NAME[tgt]}.\n" | |
| f"- Do NOT echo the source language. Do NOT mix languages.\n" | |
| f"- Preserve meaning, punctuation, numbers, line breaks.\n" | |
| f"- Translate pangrams naturally (do not keep them in source language).\n\n" | |
| f"{ex}" | |
| ) | |
| async def ask_ollama(system: str, content: str): | |
| return await client.chat.completions.create( | |
| model=MODEL, | |
| temperature=0, | |
| messages=[{"role": "system", "content": system}, | |
| {"role": "user", "content": content}], | |
| extra_body={"options": {"num_ctx": 4096, "num_predict": -1}} | |
| ) | |
| async def translate_chunk(chunk: str, src: str, tgt: str) -> str: | |
| system = build_system(src, tgt) | |
| # Attempt 1 — ปกติ | |
| user = f"Translate strictly into {LANG_NAME[tgt]} ONLY:\n\n{chunk}" | |
| out = (await ask_ollama(system, user)).choices[0].message.content.strip() | |
| if is_target_strict(out, tgt): | |
| return out | |
| # Attempt 2 — เข้มขึ้น | |
| user2 = ( | |
| f"STRICT MODE: Output MUST be in {LANG_NAME[tgt]} ONLY.\n" | |
| f"NO {('Thai' if tgt=='en' else 'English')} letters.\n" | |
| f"Translate the following:\n{chunk}" | |
| ) | |
| out2 = (await ask_ollama(system, user2)).choices[0].message.content.strip() | |
| if is_target_strict(out2, tgt): | |
| return out2 | |
| # Attempt 3 — ย้ำคำสั่งด้วยภาษาปลายทาง (ช่วยรีดให้เป็นไทยล้วน) | |
| if tgt == "th": | |
| user3 = f"โปรดแปลเป็นภาษาไทยเท่านั้น ห้ามมีอักษรอังกฤษ:\n{chunk}" | |
| else: | |
| user3 = f"Translate into English only. No Thai letters:\n{chunk}" | |
| out3 = (await ask_ollama(system, user3)).choices[0].message.content.strip() | |
| # ส่งผลลัพธ์ที่ดีที่สุดในสามรอบ | |
| for cand in (out3, out2, out): | |
| if is_target_strict(cand, tgt): | |
| return cand | |
| return out3 or out2 or out # อย่างน้อยให้มีคำตอบกลับไป | |
| async def translate(req: TranslateReq): | |
| src = req.source | |
| if src == "auto": | |
| src = guess_lang(req.text) | |
| if src == "auto" or src == req.target: | |
| src = "en" if re.search(r"[A-Za-z]", req.text) else "th" | |
| chunks = split_text(req.text, max_chars=1200) | |
| sem = asyncio.Semaphore(2) # ปรับเป็น 3–4 ได้ถ้าเครื่องแรง | |
| async def run(c): | |
| async with sem: | |
| return await translate_chunk(c, src, req.target) | |
| outs = await asyncio.gather(*[run(c) for c in chunks]) | |
| return { | |
| "translation": "".join(outs), | |
| "model": MODEL, | |
| "source": src, | |
| "target": req.target, | |
| "chunks": len(chunks) | |
| } | |