#%% from enum import Enum from fastapi import FastAPI, Query from rich.console import Console import ctranslate2 import transformers from time import perf_counter import gradio as gr from pydantic import BaseModel, Field from lang_codes import flores_codes import logging from typing import Union #%% # global variables translator = None tokenizer = None human_readable_languages = sorted(list(flores_codes.keys())) delta_load_model = None delta_translate = None # api docs. 📜 description = f""" 🚀 This API will take a text from user, src language and target language and then translate 📜 ➡️ 🗒️ it. Backend is C-translate2 which is C++ accelerate backend for transformers. ## Languages supported Acehnese (Arabic script) Acehnese (Latin script) Afrikaans Akan Amharic Armenian Assamese Asturian Awadhi Ayacucho Quechua Balinese Bambara Banjar (Arabic script) Banjar (Latin script) Bashkir Basque Belarusian Bemba Bengali Bhojpuri Bosnian Buginese Bulgarian Burmese Catalan Cebuano Central Atlas Tamazight Central Aymara Central Kanuri (Arabic script) Central Kanuri (Latin script) Central Kurdish Chhattisgarhi Chinese (Simplified) Chinese (Traditional) Chokwe Crimean Tatar Croatian Czech Danish Dari Dutch Dyula Dzongkha Eastern Panjabi Eastern Yiddish Egyptian Arabic English Esperanto Estonian Ewe Faroese Fijian Finnish Fon French Friulian Galician Ganda Georgian German Greek Guarani Gujarati Haitian Creole Halh Mongolian Hausa Hebrew Hindi Hungarian Icelandic Igbo Ilocano Indonesian Irish Italian Japanese Javanese Jingpho Kabiyè Kabuverdianu Kabyle Kamba Kannada Kashmiri (Arabic script) Kashmiri (Devanagari script) Kazakh Khmer Kikongo Kikuyu Kimbundu Kinyarwanda Korean Kyrgyz Lao Latgalian Ligurian Limburgish Lingala Lithuanian Lombard Luba-Kasai Luo Luxembourgish Macedonian Magahi Maithili Malayalam Maltese Maori Marathi Meitei (Bengali script) Mesopotamian Arabic Minangkabau (Arabic script) Minangkabau (Latin script) Mizo Modern Standard Arabic Modern Standard Arabic (Romanized) Moroccan Arabic Mossi Najdi Arabic Nepali Nigerian Fulfulde North Azerbaijani North Levantine Arabic Northern Kurdish Northern Sotho Northern Uzbek Norwegian Bokmål Norwegian Nynorsk Nuer Nyanja Occitan Odia Pangasinan Papiamento Plateau Malagasy Polish Portuguese Romanian Rundi Russian Samoan Sango Sanskrit Santali Sardinian Scottish Gaelic Serbian Shan Shona Sicilian Silesian Sindhi Sinhala Slovak Slovenian Somali South Azerbaijani South Levantine Arabic Southern Pashto Southern Sotho Southwestern Dinka Spanish Standard Latvian Standard Malay Standard Tibetan Sundanese Swahili Swati Swedish Tagalog Tajik Tamasheq (Latin script) Tamasheq (Tifinagh script) Tamil Tatar Ta’izzi-Adeni Arabic Telugu Thai Tigrinya Tok Pisin Tosk Albanian Tsonga Tswana Tumbuka Tunisian Arabic Turkish Turkmen Twi Ukrainian Umbundu Urdu Uyghur Venetian Vietnamese Waray Welsh West Central Oromo Western Persian Wolof Xhosa Yoruba Yue Chinese Zulu """ tags_metadata = [ { "name": "translate", "description": "endpoints related to translation", }, ] app = FastAPI( title="Translator-txt2txt", description=description, version="0.0.1", terms_of_service="http://example.com/terms/", contact={ "name": "chooch.ai", "url": "https://chooch.ai/", }, license_info={ "name": "Apache 2.0", "url": "https://chooch.ai/", }, openapi_tags=tags_metadata ) class MetaData(BaseModel): text: str source_language : str target_langauge : str class TranslationResponse(BaseModel): metadata : MetaData translated_text : str time_load_model_sec : float time_translate_sec : float @app.on_event("startup") async def load_model(): # Load the model during API startup t0 = perf_counter() global translator, tokenizer translator = ctranslate2.Translator("ct2_model_nllb", device="cuda", device_index=0, compute_type='float16') tokenizer = transformers.AutoTokenizer.from_pretrained("facebook/nllb-200-3.3B") Console().log(f"model has been loaded", style='red on green') t1 = perf_counter() global delta_load_model delta_load_model = t1-t0 @app.post("/translate/", tags=["translate"], response_model=TranslationResponse) async def translate( text : str = Query(default='', description="text that needs to be translated from src to tgt language"), src: str = Query(default='English', enum=human_readable_languages, description="source language to translate text From"), tgt: str = Query(default='Turkish', enum=human_readable_languages, description="target language to translate text To"), ): ''' 🔴 translate the text from src language to target language ''' global tokenizer, translator source = flores_codes[src] target = flores_codes[tgt] Console().log(f"Source language is {source}", style='bold red') Console().log(f"Destination language is {target}", style='bold red') # Load the tokenizer tokenizer.src_lang=source # Inference source_tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(text)) target_prefix = [target] t2 = perf_counter() results = translator.translate_batch([source_tokens], target_prefix=[target_prefix]) t3 = perf_counter() target_ = results[0].hypotheses[0][1:] target_ = tokenizer.decode(tokenizer.convert_tokens_to_ids(target_)) global delta_translate delta_translate = t3-t2 Console().log(f"hypotheses are {target_}") return {'translated_text' : target_, 'metadata' : { 'text' : text, 'source_language' : src, 'target_langauge' : tgt }, 'time_load_model_sec' : delta_load_model, 'time_translate_sec' : delta_translate } if __name__ == '__main__': import uvicorn uvicorn.run(app, host='0.0.0.0', port=8124) # class Language(str, Enum): # english = "English" # spanish = "Spanish" # french = "French" # @app.get("/translate/") # async def translate_text(language: Union[str, None] = Query(default=None, enum=LANGUAGE_CODES),): # return {"language": language}