legolasyiu commited on
Commit
fa1257a
·
verified ·
1 Parent(s): 34f1613

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -1,48 +1,53 @@
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoProcessor, AutoModelForImageTextToText
4
- import nest_asyncio
5
- nest_asyncio.apply()
6
 
7
  # ---------------- MODEL SETUP ---------------- #
8
  MODEL_ID = "EpistemeAI/Audiogemma-3N-finetune"
9
 
 
10
  processor = AutoProcessor.from_pretrained(MODEL_ID)
 
 
11
  model = AutoModelForImageTextToText.from_pretrained(
12
  MODEL_ID,
13
- torch_dtype=torch.bfloat16,
14
  device_map="auto"
15
  )
16
 
17
- # ---------------- TRANSLATION FUNCTION ---------------- #
18
 
19
- def transcribe_and_translate(audio_input, target_language):
20
 
 
21
  messages = [
22
  {
23
  "role": "user",
24
  "content": [
25
- {"type": "audio", "audio": audio_input},
26
  {
27
  "type": "text",
28
- "text": f"Transcribe this audio into English, and then translate it into {target_language}."
29
  },
30
  ]
31
  }
32
  ]
33
 
34
- input_ids = processor.apply_chat_template(
35
  messages,
36
  add_generation_prompt=True,
37
  tokenize=True,
38
- return_dict=True,
39
- return_tensors="pt",
40
  )
41
 
42
- input_ids = input_ids.to(model.device, dtype=model.dtype)
43
 
44
  with torch.no_grad():
45
- outputs = model.generate(**input_ids, max_new_tokens=256)
 
 
 
 
46
 
47
  text = processor.batch_decode(
48
  outputs,
@@ -63,23 +68,14 @@ LANGUAGES = [
63
 
64
  with gr.Blocks() as demo:
65
  gr.Markdown("## 🎙️ Multilingual Audio Translator")
66
- gr.Markdown("Speak English. The model will transcribe and translate into your chosen language.")
67
 
68
  with gr.Row():
69
  audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
70
- language_dropdown = gr.Dropdown(
71
- choices=LANGUAGES,
72
- value="French",
73
- label="Target Language"
74
- )
75
 
76
  translate_btn = gr.Button("Translate")
77
-
78
- output_text = gr.Textbox(
79
- label="Translation Output",
80
- lines=10,
81
- interactive=False
82
- )
83
 
84
  translate_btn.click(
85
  fn=transcribe_and_translate,
@@ -87,4 +83,9 @@ with gr.Blocks() as demo:
87
  outputs=output_text
88
  )
89
 
90
- demo.launch(debug=True)
 
 
 
 
 
 
1
  import gradio as gr
2
  import torch
3
  from transformers import AutoProcessor, AutoModelForImageTextToText
 
 
4
 
5
  # ---------------- MODEL SETUP ---------------- #
6
  MODEL_ID = "EpistemeAI/Audiogemma-3N-finetune"
7
 
8
+ print("Loading processor...")
9
  processor = AutoProcessor.from_pretrained(MODEL_ID)
10
+
11
+ print("Loading model...")
12
  model = AutoModelForImageTextToText.from_pretrained(
13
  MODEL_ID,
14
+ torch_dtype=torch.float16, # safer than bfloat16 on most GPUs
15
  device_map="auto"
16
  )
17
 
18
+ model.eval()
19
 
20
+ # ---------------- TRANSLATION FUNCTION ---------------- #
21
 
22
+ def transcribe_and_translate(audio_path, target_language):
23
  messages = [
24
  {
25
  "role": "user",
26
  "content": [
27
+ {"type": "audio", "audio": audio_path},
28
  {
29
  "type": "text",
30
+ "text": f"Transcribe this audio into English, then translate it into {target_language}."
31
  },
32
  ]
33
  }
34
  ]
35
 
36
+ inputs = processor.apply_chat_template(
37
  messages,
38
  add_generation_prompt=True,
39
  tokenize=True,
40
+ return_tensors="pt"
 
41
  )
42
 
43
+ inputs = inputs.to(model.device)
44
 
45
  with torch.no_grad():
46
+ outputs = model.generate(
47
+ **inputs,
48
+ max_new_tokens=256,
49
+ do_sample=False
50
+ )
51
 
52
  text = processor.batch_decode(
53
  outputs,
 
68
 
69
  with gr.Blocks() as demo:
70
  gr.Markdown("## 🎙️ Multilingual Audio Translator")
71
+ gr.Markdown("Upload or record English audio. The model will transcribe and translate it.")
72
 
73
  with gr.Row():
74
  audio_input = gr.Audio(type="filepath", label="Upload or Record Audio")
75
+ language_dropdown = gr.Dropdown(choices=LANGUAGES, value="French", label="Target Language")
 
 
 
 
76
 
77
  translate_btn = gr.Button("Translate")
78
+ output_text = gr.Textbox(label="Translation Output", lines=10)
 
 
 
 
 
79
 
80
  translate_btn.click(
81
  fn=transcribe_and_translate,
 
83
  outputs=output_text
84
  )
85
 
86
+ # ---------------- LAUNCH ---------------- #
87
+
88
+ demo.launch(
89
+ server_port=7861, # avoid stuck 7860
90
+ debug=True
91
+ )