| | from fastapi import FastAPI, HTTPException |
| | from transformers import MarianMTModel, MarianTokenizer |
| | import torch |
| | from langdetect import detect, LangDetectException |
| | from pydantic import BaseModel |
| |
|
| | |
| | app = FastAPI(title="Helsinki-NLP Translation API") |
| |
|
| | |
| | MODEL_MAPPING = { |
| | "th": "Helsinki-NLP/opus-mt-th-en", |
| | "ja": "Helsinki-NLP/opus-mt-ja-en", |
| | "zh": "Helsinki-NLP/opus-mt-zh-en", |
| | "vi": "Helsinki-NLP/opus-mt-vi-en" |
| | } |
| |
|
| | |
| | models = {} |
| | tokenizers = {} |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| |
|
| | try: |
| | for lang, model_name in MODEL_MAPPING.items(): |
| | tokenizers[lang] = MarianTokenizer.from_pretrained(model_name) |
| | models[lang] = MarianMTModel.from_pretrained(model_name).to(device) |
| | models[lang].eval() |
| | except Exception as e: |
| | raise Exception(f"Gagal memuat model: {str(e)}") |
| |
|
| | |
| | class TranslationResponse(BaseModel): |
| | translated_text: str | None = None |
| | source_lang: str | None = None |
| | message: str | None = None |
| | error: str | None = None |
| |
|
| | |
| | def translate_text(text: str, source_lang: str = None): |
| | try: |
| | |
| | if not text.strip(): |
| | return {"error": "Teks tidak boleh kosong"}, None |
| |
|
| | |
| | if not source_lang: |
| | try: |
| | detected_lang = detect(text) |
| | if detected_lang == "en": |
| | return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, detected_lang |
| | if detected_lang not in MODEL_MAPPING: |
| | return {"error": f"Bahasa terdeteksi '{detected_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, detected_lang |
| | source_lang = detected_lang |
| | except LangDetectException: |
| | return {"error": "Gagal mendeteksi bahasa. Harap masukkan kode bahasa sumber (th, ja, zh, vi)"}, None |
| | else: |
| | if source_lang == "en": |
| | return {"translated_text": text, "message": "Teks sudah dalam bahasa Inggris"}, source_lang |
| | if source_lang not in MODEL_MAPPING: |
| | return {"error": f"Kode bahasa '{source_lang}' tidak didukung. Hanya mendukung: {list(MODEL_MAPPING.keys())}"}, None |
| |
|
| | |
| | tokenizer = tokenizers[source_lang] |
| | model = models[source_lang] |
| |
|
| | |
| | encoded = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device) |
| | generated_tokens = model.generate(**encoded) |
| | translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0] |
| |
|
| | return {"translated_text": translated_text}, source_lang |
| |
|
| | except Exception as e: |
| | return {"error": f"Terjemahan gagal: {str(e)}"}, None |
| |
|
| | |
| | @app.get("/translate", response_model=TranslationResponse) |
| | async def translate(text: str, lang: str | None = None): |
| | result, detected_lang = translate_text(text, lang) |
| | if "error" in result: |
| | raise HTTPException(status_code=400, detail=result["error"]) |
| | return { |
| | "translated_text": result.get("translated_text"), |
| | "source_lang": detected_lang, |
| | "message": result.get("message") |
| | } |
| |
|
| | |
| | if __name__ == "__main__": |
| | import uvicorn |
| | uvicorn.run(app, host="0.0.0.0", port=8000) |