TalhaUsuf
(fixed output)
b57f9c8
#%%
from enum import Enum
from fastapi import FastAPI, Query
from rich.console import Console
import ctranslate2
import transformers
from time import perf_counter
import gradio as gr
from pydantic import BaseModel, Field
from lang_codes import flores_codes
import logging
from typing import Union
#%%
# global variables
translator = None
tokenizer = None
human_readable_languages = sorted(list(flores_codes.keys()))
delta_load_model = None
delta_translate = None
# api docs. 📜
description = f"""
🚀 This API will take a text from user, src language and target language and then translate 📜 ➡️ 🗒️ it.
Backend is C-translate2 which is C++ accelerate backend for transformers.
## Languages supported
Acehnese (Arabic script)
Acehnese (Latin script)
Afrikaans
Akan
Amharic
Armenian
Assamese
Asturian
Awadhi
Ayacucho Quechua
Balinese
Bambara
Banjar (Arabic script)
Banjar (Latin script)
Bashkir
Basque
Belarusian
Bemba
Bengali
Bhojpuri
Bosnian
Buginese
Bulgarian
Burmese
Catalan
Cebuano
Central Atlas Tamazight
Central Aymara
Central Kanuri (Arabic script)
Central Kanuri (Latin script)
Central Kurdish
Chhattisgarhi
Chinese (Simplified)
Chinese (Traditional)
Chokwe
Crimean Tatar
Croatian
Czech
Danish
Dari
Dutch
Dyula
Dzongkha
Eastern Panjabi
Eastern Yiddish
Egyptian Arabic
English
Esperanto
Estonian
Ewe
Faroese
Fijian
Finnish
Fon
French
Friulian
Galician
Ganda
Georgian
German
Greek
Guarani
Gujarati
Haitian Creole
Halh Mongolian
Hausa
Hebrew
Hindi
Hungarian
Icelandic
Igbo
Ilocano
Indonesian
Irish
Italian
Japanese
Javanese
Jingpho
Kabiyè
Kabuverdianu
Kabyle
Kamba
Kannada
Kashmiri (Arabic script)
Kashmiri (Devanagari script)
Kazakh
Khmer
Kikongo
Kikuyu
Kimbundu
Kinyarwanda
Korean
Kyrgyz
Lao
Latgalian
Ligurian
Limburgish
Lingala
Lithuanian
Lombard
Luba-Kasai
Luo
Luxembourgish
Macedonian
Magahi
Maithili
Malayalam
Maltese
Maori
Marathi
Meitei (Bengali script)
Mesopotamian Arabic
Minangkabau (Arabic script)
Minangkabau (Latin script)
Mizo
Modern Standard Arabic
Modern Standard Arabic (Romanized)
Moroccan Arabic
Mossi
Najdi Arabic
Nepali
Nigerian Fulfulde
North Azerbaijani
North Levantine Arabic
Northern Kurdish
Northern Sotho
Northern Uzbek
Norwegian Bokmål
Norwegian Nynorsk
Nuer
Nyanja
Occitan
Odia
Pangasinan
Papiamento
Plateau Malagasy
Polish
Portuguese
Romanian
Rundi
Russian
Samoan
Sango
Sanskrit
Santali
Sardinian
Scottish Gaelic
Serbian
Shan
Shona
Sicilian
Silesian
Sindhi
Sinhala
Slovak
Slovenian
Somali
South Azerbaijani
South Levantine Arabic
Southern Pashto
Southern Sotho
Southwestern Dinka
Spanish
Standard Latvian
Standard Malay
Standard Tibetan
Sundanese
Swahili
Swati
Swedish
Tagalog
Tajik
Tamasheq (Latin script)
Tamasheq (Tifinagh script)
Tamil
Tatar
Ta’izzi-Adeni Arabic
Telugu
Thai
Tigrinya
Tok Pisin
Tosk Albanian
Tsonga
Tswana
Tumbuka
Tunisian Arabic
Turkish
Turkmen
Twi
Ukrainian
Umbundu
Urdu
Uyghur
Venetian
Vietnamese
Waray
Welsh
West Central Oromo
Western Persian
Wolof
Xhosa
Yoruba
Yue Chinese
Zulu
"""
tags_metadata = [
{
"name": "translate",
"description": "endpoints related to translation",
},
]
app = FastAPI(
title="Translator-txt2txt",
description=description,
version="0.0.1",
terms_of_service="http://example.com/terms/",
contact={
"name": "chooch.ai",
"url": "https://chooch.ai/",
},
license_info={
"name": "Apache 2.0",
"url": "https://chooch.ai/",
},
openapi_tags=tags_metadata
)
class MetaData(BaseModel):
text: str
source_language : str
target_langauge : str
class TranslationResponse(BaseModel):
metadata : MetaData
translated_text : str
time_load_model_sec : float
time_translate_sec : float
@app.on_event("startup")
async def load_model():
# Load the model during API startup
t0 = perf_counter()
global translator, tokenizer
translator = ctranslate2.Translator("ct2_model_nllb", device="cuda", device_index=0, compute_type='float16')
tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B")
Console().log(f"model has been loaded", style='red on green')
t1 = perf_counter()
global delta_load_model
delta_load_model = t1-t0
@app.post("/translate/", tags=["translate"], response_model=TranslationResponse)
async def translate(
text : str = Query(default='', description="text that needs to be translated from src to tgt language"),
src: str = Query(default='English', enum=human_readable_languages, description="source language to translate text From"),
tgt: str = Query(default='Turkish', enum=human_readable_languages, description="target language to translate text To"),
):
'''
🔴 translate the text from src language to target language
'''
global tokenizer, translator
source = flores_codes[src]
target = flores_codes[tgt]
Console().log(f"Source language is {source}", style='bold red')
Console().log(f"Destination language is {target}", style='bold red')
# Load the tokenizer
tokenizer.src_lang=source
# Inference
source_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(text))
target_prefix = [target]
t2 = perf_counter()
results = translator.translate_batch([source_tokens], target_prefix=[target_prefix])
t3 = perf_counter()
target_ = results[0].hypotheses[0][1:]
target_ = tokenizer.decode(tokenizer.convert_tokens_to_ids(target_))
global delta_translate
delta_translate = t3-t2
Console().log(f"hypotheses are {target_}")
return {'translated_text' : target_,
'metadata' : {
'text' : text,
'source_language' : src,
'target_langauge' : tgt
},
'time_load_model_sec' : delta_load_model,
'time_translate_sec' : delta_translate
}
if __name__ == '__main__':
import uvicorn
uvicorn.run(app, host='0.0.0.0', port=8124)
# class Language(str, Enum):
# english = "English"
# spanish = "Spanish"
# french = "French"
# @app.get("/translate/")
# async def translate_text(language: Union[str, None] = Query(default=None, enum=LANGUAGE_CODES),):
# return {"language": language}