import gradio as gr import torch import spaces from transformers import AutoModelForSeq2SeqLM, AutoTokenizer from peft import PeftModel # --- Configuration --- BASE_MODEL_ID = "CohereForAI/aya-101" # Map the dropdown options to your 3 Hugging Face Model IDs MODEL_MAP = { "English to Angika": "snjev310/aya-101-english-angika", "Hindi to Angika": "snjev310/aya-101-hindi-angika", "Angika to English": "snjev310/aya-101-angika-english" } # Load Tokenizer globally (it's small and stays in CPU RAM) tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID) @spaces.GPU(duration=180) # 3 minutes to allow for 13B model loading + inference def translate(text, model_choice): if not text.strip(): return "Please enter text to translate." adapter_id = MODEL_MAP[model_choice] # 1. Load Base Model in bfloat16 (Standard for Aya-101) # Pro ZeroGPU has ~70GB VRAM, so we don't need 4-bit quantization base_model = AutoModelForSeq2SeqLM.from_pretrained( BASE_MODEL_ID, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, device_map="auto" ) # 2. Load the specific PEFT Adapter model = PeftModel.from_pretrained(base_model, adapter_id) model.eval() # 3. Prepare Input # Using a prompt format helps the model understand the task prompt = f"{model_choice}: {text}" inputs = tokenizer(prompt, return_tensors="pt").to(model.device) # 4. Generate with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=256, do_sample=True, temperature=0.3, top_p=0.9 ) result = tokenizer.decode(outputs[0], skip_special_tokens=True) # 5. Cleanup (CRITICAL for ZeroGPU to release resources) del model del base_model torch.cuda.empty_cache() return result # --- Gradio UI --- with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🗣️ Angika Multi-Translator") gr.Markdown("Powered by **Aya-101** and **ZeroGPU**. Select your translation direction below.") with gr.Row(): with gr.Column(): model_dropdown = gr.Dropdown( choices=list(MODEL_MAP.keys()), value="English to Angika", label="Select Translation Mode" ) input_text = gr.Textbox( label="Input Text", placeholder="Type here...", lines=5 ) submit_btn = gr.Button("Translate", variant="primary") with gr.Column(): output_text = gr.Textbox( label="Translated Text", lines=5, interactive=False ) submit_btn.click( fn=translate, inputs=[input_text, model_dropdown], outputs=output_text ) gr.Examples( examples=[ ["How are you doing today?", "English to Angika"], ["आप कैसे हैं?", "Hindi to Angika"], ], inputs=[input_text, model_dropdown] ) demo.launch()