snjev310's picture
Update app.py
a6311ff verified
import gradio as gr
import torch
import spaces
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from peft import PeftModel
# --- Configuration ---
BASE_MODEL_ID = "CohereForAI/aya-101"
# Map the dropdown options to your 3 Hugging Face Model IDs
MODEL_MAP = {
"English to Angika": "snjev310/aya-101-english-angika",
"Hindi to Angika": "snjev310/aya-101-hindi-angika",
"Angika to English": "snjev310/aya-101-angika-english"
}
# Load Tokenizer globally (it's small and stays in CPU RAM)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
@spaces.GPU(duration=180) # 3 minutes to allow for 13B model loading + inference
def translate(text, model_choice):
if not text.strip():
return "Please enter text to translate."
adapter_id = MODEL_MAP[model_choice]
# 1. Load Base Model in bfloat16 (Standard for Aya-101)
# Pro ZeroGPU has ~70GB VRAM, so we don't need 4-bit quantization
base_model = AutoModelForSeq2SeqLM.from_pretrained(
BASE_MODEL_ID,
torch_dtype=torch.bfloat16,
low_cpu_mem_usage=True,
device_map="auto"
)
# 2. Load the specific PEFT Adapter
model = PeftModel.from_pretrained(base_model, adapter_id)
model.eval()
# 3. Prepare Input
# Using a prompt format helps the model understand the task
prompt = f"{model_choice}: {text}"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
# 4. Generate
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.3,
top_p=0.9
)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
# 5. Cleanup (CRITICAL for ZeroGPU to release resources)
del model
del base_model
torch.cuda.empty_cache()
return result
# --- Gradio UI ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🗣️ Angika Multi-Translator")
gr.Markdown("Powered by **Aya-101** and **ZeroGPU**. Select your translation direction below.")
with gr.Row():
with gr.Column():
model_dropdown = gr.Dropdown(
choices=list(MODEL_MAP.keys()),
value="English to Angika",
label="Select Translation Mode"
)
input_text = gr.Textbox(
label="Input Text",
placeholder="Type here...",
lines=5
)
submit_btn = gr.Button("Translate", variant="primary")
with gr.Column():
output_text = gr.Textbox(
label="Translated Text",
lines=5,
interactive=False
)
submit_btn.click(
fn=translate,
inputs=[input_text, model_dropdown],
outputs=output_text
)
gr.Examples(
examples=[
["How are you doing today?", "English to Angika"],
["आप कैसे हैं?", "Hindi to Angika"],
],
inputs=[input_text, model_dropdown]
)
demo.launch()