snjev310 commited on
Commit
a6311ff
·
verified ·
1 Parent(s): 7a44fae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -32
app.py CHANGED
@@ -1,65 +1,104 @@
1
  import gradio as gr
2
  import torch
3
  import spaces
4
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, BitsAndBytesConfig
5
  from peft import PeftModel
6
 
7
- # Configuration
8
  BASE_MODEL_ID = "CohereForAI/aya-101"
9
- ADAPTER_ID = "snjev310/aya-101-english-angika"
10
 
11
- # Tokenizer can stay global (it's small)
 
 
 
 
 
 
 
12
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
13
 
14
- @spaces.GPU(duration=60) # Increased duration for the large model load
15
- def translate(text):
16
  if not text.strip():
17
- return ""
18
 
19
- # 1. Use 4-bit quantization to fit the model easily
20
- quant_config = BitsAndBytesConfig(
21
- load_in_4bit=True,
22
- bnb_4bit_compute_dtype=torch.float16,
23
- bnb_4bit_use_double_quant=True,
24
- bnb_4bit_quant_type="nf4"
25
- )
26
-
27
- # 2. Load model INSIDE the function for ZeroGPU
28
  base_model = AutoModelForSeq2SeqLM.from_pretrained(
29
  BASE_MODEL_ID,
30
- quantization_config=quant_config,
 
31
  device_map="auto"
32
  )
33
 
34
- model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
 
 
35
 
36
- # 3. Inference
37
- input_text = f"translate English to Angika: {text}"
38
- inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
 
39
 
 
40
  with torch.no_grad():
41
  outputs = model.generate(
42
  **inputs,
43
- max_new_tokens=128,
 
44
  temperature=0.3,
45
- do_sample=True
46
  )
47
 
48
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
 
50
- # Optional: Clean up to free memory immediately
51
  del model
52
  del base_model
53
  torch.cuda.empty_cache()
54
 
55
- return response
56
 
57
- # UI remains the same
58
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
59
- gr.Markdown("# 🗣️ Angika Translator (Aya-101)")
60
- input_box = gr.Textbox(label="English Input")
61
- output_box = gr.Textbox(label="Angika Translation")
62
- btn = gr.Button("Translate", variant="primary")
63
- btn.click(fn=translate, inputs=input_box, outputs=output_box)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
  import spaces
4
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
5
  from peft import PeftModel
6
 
7
+ # --- Configuration ---
8
  BASE_MODEL_ID = "CohereForAI/aya-101"
 
9
 
10
+ # Map the dropdown options to your 3 Hugging Face Model IDs
11
+ MODEL_MAP = {
12
+ "English to Angika": "snjev310/aya-101-english-angika",
13
+ "Hindi to Angika": "snjev310/aya-101-hindi-angika",
14
+ "Angika to English": "snjev310/aya-101-angika-english"
15
+ }
16
+
17
+ # Load Tokenizer globally (it's small and stays in CPU RAM)
18
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
19
 
20
+ @spaces.GPU(duration=180) # 3 minutes to allow for 13B model loading + inference
21
+ def translate(text, model_choice):
22
  if not text.strip():
23
+ return "Please enter text to translate."
24
 
25
+ adapter_id = MODEL_MAP[model_choice]
26
+
27
+ # 1. Load Base Model in bfloat16 (Standard for Aya-101)
28
+ # Pro ZeroGPU has ~70GB VRAM, so we don't need 4-bit quantization
 
 
 
 
 
29
  base_model = AutoModelForSeq2SeqLM.from_pretrained(
30
  BASE_MODEL_ID,
31
+ torch_dtype=torch.bfloat16,
32
+ low_cpu_mem_usage=True,
33
  device_map="auto"
34
  )
35
 
36
+ # 2. Load the specific PEFT Adapter
37
+ model = PeftModel.from_pretrained(base_model, adapter_id)
38
+ model.eval()
39
 
40
+ # 3. Prepare Input
41
+ # Using a prompt format helps the model understand the task
42
+ prompt = f"{model_choice}: {text}"
43
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
44
 
45
+ # 4. Generate
46
  with torch.no_grad():
47
  outputs = model.generate(
48
  **inputs,
49
+ max_new_tokens=256,
50
+ do_sample=True,
51
  temperature=0.3,
52
+ top_p=0.9
53
  )
54
 
55
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
56
 
57
+ # 5. Cleanup (CRITICAL for ZeroGPU to release resources)
58
  del model
59
  del base_model
60
  torch.cuda.empty_cache()
61
 
62
+ return result
63
 
64
+ # --- Gradio UI ---
65
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
66
+ gr.Markdown("# 🗣️ Angika Multi-Translator")
67
+ gr.Markdown("Powered by **Aya-101** and **ZeroGPU**. Select your translation direction below.")
68
+
69
+ with gr.Row():
70
+ with gr.Column():
71
+ model_dropdown = gr.Dropdown(
72
+ choices=list(MODEL_MAP.keys()),
73
+ value="English to Angika",
74
+ label="Select Translation Mode"
75
+ )
76
+ input_text = gr.Textbox(
77
+ label="Input Text",
78
+ placeholder="Type here...",
79
+ lines=5
80
+ )
81
+ submit_btn = gr.Button("Translate", variant="primary")
82
+
83
+ with gr.Column():
84
+ output_text = gr.Textbox(
85
+ label="Translated Text",
86
+ lines=5,
87
+ interactive=False
88
+ )
89
+
90
+ submit_btn.click(
91
+ fn=translate,
92
+ inputs=[input_text, model_dropdown],
93
+ outputs=output_text
94
+ )
95
+
96
+ gr.Examples(
97
+ examples=[
98
+ ["How are you doing today?", "English to Angika"],
99
+ ["आप कैसे हैं?", "Hindi to Angika"],
100
+ ],
101
+ inputs=[input_text, model_dropdown]
102
+ )
103
 
104
  demo.launch()