Spaces:
Sleeping
Sleeping
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, BitsAndBytesConfig | |
| from sacremoses import MosesPunctNormalizer | |
| from flores import code_mapping | |
| import gradio as gr | |
| import platform | |
| device = "cpu" if platform.system() == "Darwin" else "cuda" | |
| device='cpu' | |
| MODEL_DIR = "RohanAi/nllb_quantized" | |
| # # 8-bit quantization for GPU | |
| # bnb_config = BitsAndBytesConfig(load_in_8bit=True) | |
| # Load tokenizer + model | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_DIR) | |
| punct_normalizer = MosesPunctNormalizer(lang="en") | |
| # Language mapping | |
| # langs = { | |
| # "Hindi": "hin_Deva", | |
| # "French": "fra_Latn", | |
| # "Spanish": "spa_Latn", | |
| # "German": "deu_Latn", | |
| # "Arabic": "arb_Arab" | |
| # } | |
| def translate(text: str, src_lang: str, tgt_lang: str): | |
| src_code = code_mapping[src_lang] # e.g. "English" -> "eng_Latn" | |
| tgt_code = code_mapping[tgt_lang] | |
| print('source lang code ',src_code) # e.g. "Hindi" -> "hin_Deva" | |
| tokenizer.src_lang = src_code | |
| tokenizer.tgt_lang = tgt_code | |
| # Normalize punctuation | |
| text = punct_normalizer.normalize(text) | |
| # Encode & generate | |
| inputs = tokenizer(text, return_tensors="pt").to(device) | |
| outputs = model.generate( | |
| **inputs, | |
| forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_code), # use FLORES code | |
| # max_length=len(inputs.input_ids[0]) + 150, # dynamic max length | |
| num_beams=3, # CPU-friendly greedy decoding | |
| no_repeat_ngram_size=2, # small repetition control | |
| ) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| langs = list(code_mapping.keys()) | |
| iface = gr.Interface( | |
| fn=translate, | |
| inputs=[gr.Textbox(lines=10, label="Input Text"), | |
| gr.Dropdown(langs, label="Source Language"), | |
| gr.Dropdown(langs, label="Target Language")], | |
| outputs=gr.Textbox(lines=30, label="Translated Text"), | |
| title="🌍 Language Translation (CPU-friendly)" | |
| ) | |
| iface.launch(share=True) |