Spaces:

DHEIVER
/

SeamlessTranslator

Sleeping

App Files Files Community

DHEIVER commited on Jan 18, 2025

Commit

a58afc9

verified ·

1 Parent(s): 89d8261

Update app.py

Browse files

Files changed (1) hide show

app.py +154 -267

app.py CHANGED Viewed

@@ -3,326 +3,213 @@ import torch
 import torchaudio
 import numpy as np
 from transformers import AutoProcessor, SeamlessM4Tv2Model
-from datetime import datetime
-import time
-class ARISTranslator:
-    def __init__(self, model_name: str = "facebook/seamless-m4t-v2-large"):
-        self.processor = AutoProcessor.from_pretrained(model_name)
-        self.model = SeamlessM4Tv2Model.from_pretrained(model_name)
         self.sample_rate = self.model.config.sampling_rate
-        self.language_codes = {
-            "English (US)": "eng",
-            "Spanish (ES)": "spa",
-            "French (FR)": "fra",
-            "German (DE)": "deu",
-            "Italian (IT)": "ita",
-            "Portuguese (BR)": "por",
-            "Russian (RU)": "rus",
-            "Chinese (CN)": "cmn",
-            "Japanese (JP)": "jpn",
-            "Korean (KR)": "kor",
-            "Hindi (IN)": "hin",
-            "Arabic (AR)": "ara"
         }
-    def process_audio(self, audio_path: str, tgt_lang: str) -> tuple[int, np.ndarray]:
         try:
-            if audio_path is None:
-                raise gr.Error("No audio input provided")
-            # Carregar e resample do áudio
             audio, orig_freq = torchaudio.load(audio_path)
             audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
-            # Processar através do modelo
             inputs = self.processor(audios=audio, return_tensors="pt")
-            audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
-            return self.sample_rate, audio_array
-        except Exception as e:
-            raise gr.Error(f"Audio processing failed: {str(e)}")
-    def translate_text(self, text: str, src_lang: str, tgt_lang: str) -> tuple[int, np.ndarray]:
-        try:
-            if not text.strip():
-                raise gr.Error("No text input provided")
-            inputs = self.processor(text=text, src_lang=self.language_codes[src_lang], return_tensors="pt")
-            audio_array = self.model.generate(**inputs, tgt_lang=self.language_codes[tgt_lang])[0].cpu().numpy().squeeze()
-            return self.sample_rate, audio_array
         except Exception as e:
-            raise gr.Error(f"Translation failed: {str(e)}")
 css = """
-:root {
-    --primary: #00ffff;
-    --secondary: #0066cc;
-    --accent: #ff3366;
-    --background: #000000;
-    --text: #ffffff;
-}
-#aris-interface {
-    background-color: var(--background);
-    background-image:
-        radial-gradient(circle at 20% 20%, rgba(0, 102, 204, 0.1) 0%, transparent 50%),
-        radial-gradient(circle at 80% 80%, rgba(0, 255, 255, 0.1) 0%, transparent 50%);
-    min-height: 100vh;
-    font-family: 'Courier New', monospace;
     padding: 20px;
 }
-.title-container {
-    text-align: center;
-    color: var(--primary);
-    margin-bottom: 30px;
-    position: relative;
-}
-.title-container h1 {
-    font-size: 3em;
-    letter-spacing: 10px;
-    margin: 0;
-    text-shadow: 0 0 10px var(--primary);
-}
-.title-container h3 {
-    font-size: 1.2em;
-    letter-spacing: 3px;
-    opacity: 0.8;
-    margin: 5px 0;
 }
-#status-ring {
-    width: 400px;
-    height: 400px;
-    border: 4px solid var(--primary);
-    border-radius: 50%;
-    margin: 20px auto;
-    position: relative;
-    animation: pulse 2s infinite;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    background:
-        radial-gradient(circle at center, rgba(0, 255, 255, 0.1) 0%, transparent 70%),
-        conic-gradient(from 0deg, transparent 0%, rgba(0, 255, 255, 0.1) 50%, transparent 100%);
 }
-#outer-ring-decoration {
-    position: absolute;
-    width: 420px;
-    height: 420px;
-    border-radius: 50%;
-    border: 1px solid rgba(0, 255, 255, 0.3);
-    animation: rotate 20s linear infinite;
 }
-@keyframes rotate {
-    from { transform: rotate(0deg); }
-    to { transform: rotate(360deg); }
 }
-@keyframes pulse {
-    0% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0.4); }
-    70% { box-shadow: 0 0 0 20px rgba(0, 255, 255, 0); }
-    100% { box-shadow: 0 0 0 0 rgba(0, 255, 255, 0); }
 }
-.aris-textbox {
-    background-color: rgba(0, 0, 0, 0.8) !important;
-    border: 2px solid var(--primary) !important;
-    color: var(--primary) !important;
-    font-family: 'Courier New', monospace !important;
-    border-radius: 5px !important;
-    padding: 10px !important;
 }
-.aris-button {
-    background-color: transparent !important;
-    border: 2px solid var(--primary) !important;
-    color: var(--primary) !important;
-    font-family: 'Courier New', monospace !important;
-    text-transform: uppercase !important;
-    letter-spacing: 2px !important;
-    padding: 12px 24px !important;
-    border-radius: 5px !important;
-    transition: all 0.3s ease !important;
 }
-.aris-button:hover {
-    background-color: rgba(0, 255, 255, 0.1) !important;
-    box-shadow: 0 0 15px rgba(0, 255, 255, 0.3) !important;
-    transform: translateY(-2px) !important;
 }
-.status-box {
-    background-color: rgba(0, 0, 0, 0.8) !important;
-    border: 2px solid var(--primary) !important;
-    color: var(--primary) !important;
-    padding: 15px !important;
-    border-radius: 5px !important;
-    margin: 5px !important;
-    text-align: center !important;
-    text-transform: uppercase !important;
-    letter-spacing: 1px !important;
-    transition: all 0.3s ease !important;
-    position: relative;
-    overflow: hidden;
 }
-.status-box::before {
-    content: '';
-    position: absolute;
-    top: 0;
-    left: -100%;
-    width: 100%;
-    height: 2px;
-    background: linear-gradient(90deg, transparent, var(--primary));
-    animation: scan-line 2s linear infinite;
 }
-@keyframes scan-line {
-    0% { left: -100%; }
-    100% { left: 100%; }
 }
 """
-def create_interface():
-    translator = ARISTranslator()
-    def update_status():
-        return (
-            f"A.R.I.S. CORE v2.0.0\n"
-            f"Time: {datetime.now().strftime('%H:%M:%S')}\n"
-            f"Neural Engine: ACTIVE\n"
-            f"Translation Matrix: OPERATIONAL"
         )
-    with gr.Blocks(css=css, title="A.R.I.S. - Advanced Real-time Interpretation System") as demo:
-        gr.HTML('''
-            <div class="title-container">
-                <h1>A.R.I.S.</h1>
-                <h3>Advanced Real-time Interpretation System</h3>
-                <div class="mode-indicator">QUANTUM CORE ACTIVE</div>
-            </div>
-        ''')
-        with gr.Column(elem_id="aris-interface"):
-            gr.HTML("""
-                <div id="status-ring">
-                    <div id="outer-ring-decoration"></div>
-                    <div id="inner-ring">
-                        <div id="core">
-                            <div>A.R.I.S.</div>
-                            <div>QUANTUM CORE</div>
-                            <div>v2.0.0</div>
-                            <div class="system-version">NEURAL ENGINE ACTIVE</div>
-                        </div>
-                    </div>
-                </div>
-            """)
-            with gr.Row():
-                with gr.Column():
-                    with gr.Tab("Text Translation"):
                         text_input = gr.Textbox(
-                            label="INPUT TEXT",
-                            placeholder="Enter text for translation...",
-                            elem_classes=["aris-textbox"],
-                            lines=3
                         )
                         with gr.Row():
-                            src_lang_text = gr.Dropdown(
-                                choices=list(translator.language_codes.keys()),
-                                value="English (US)",
-                                label="SOURCE LANGUAGE",
-                                elem_classes=["aris-textbox"]
                             )
-                            tgt_lang_text = gr.Dropdown(
-                                choices=list(translator.language_codes.keys()),
-                                value="Spanish (ES)",
-                                label="TARGET LANGUAGE",
-                                elem_classes=["aris-textbox"]
                             )
-                        translate_btn = gr.Button("▶ TRANSLATE TEXT", elem_classes=["aris-button"])
-                    with gr.Tab("Audio Translation"):
                         audio_input = gr.Audio(
-                            label="AUDIO INPUT",
                             type="filepath"
                         )
                         tgt_lang_audio = gr.Dropdown(
-                            choices=list(translator.language_codes.keys()),
-                            value="English (US)",
-                            label="TARGET LANGUAGE",
-                            elem_classes=["aris-textbox"]
                         )
-                        translate_audio_btn = gr.Button("▶ TRANSLATE AUDIO", elem_classes=["aris-button"])
-                with gr.Column():
-                    audio_output = gr.Audio(
-                        label="TRANSLATION OUTPUT",
-                        type="numpy"
-                    )
-                    with gr.Row():
-                        with gr.Column(min_width=200):
-                            gr.HTML(
-                                """
-                                <div class="status-box">
-                                    NEURAL CORE<br>
-                                    <strong>OPERATIONAL</strong>
-                                </div>
-                                """
-                            )
-                        with gr.Column(min_width=200):
-                            gr.HTML(
-                                """
-                                <div class="status-box">
-                                    QUANTUM ENGINE<br>
-                                    <strong>ACTIVE</strong>
-                                </div>
-                                """
-                            )
-                    with gr.Row():
-                        with gr.Column(min_width=200):
-                            gr.HTML(
-                                """
-                                <div class="status-box">
-                                    TRANSLATION MATRIX<br>
-                                    <strong>CALIBRATED</strong>
-                                </div>
-                                """
-                            )
-                        with gr.Column(min_width=200):
-                            gr.HTML(
-                                """
-                                <div class="status-box">
-                                    VOICE SYNTHESIS<br>
-                                    <strong>READY</strong>
-                                </div>
-                                """
-                            )
-            # Event handlers
-            translate_btn.click(
-                fn=translator.translate_text,
-                inputs=[text_input, src_lang_text, tgt_lang_text],
-                outputs=audio_output
-            )
-            translate_audio_btn.click(
-                fn=translator.process_audio,
-                inputs=[audio_input, tgt_lang_audio],
-                outputs=audio_output
-            )
     return demo
 if __name__ == "__main__":
-    demo = create_interface()
     demo.queue()
     demo.launch()

 import torchaudio
 import numpy as np
 from transformers import AutoProcessor, SeamlessM4Tv2Model
+class SeamlessTranslator:
+    def __init__(self):
+        self.model_name = "facebook/seamless-m4t-v2-large"
+        print("Loading model...")
+        self.processor = AutoProcessor.from_pretrained(self.model_name)
+        self.model = SeamlessM4Tv2Model.from_pretrained(self.model_name)
         self.sample_rate = self.model.config.sampling_rate
+        self.languages = {
+            "English": "eng",
+            "Spanish": "spa",
+            "French": "fra",
+            "German": "deu",
+            "Italian": "ita",
+            "Portuguese": "por",
+            "Russian": "rus",
+            "Chinese": "cmn",
+            "Japanese": "jpn",
+            "Korean": "kor"
         }
+    def translate_text(self, text, src_lang, tgt_lang, progress=gr.Progress()):
+        progress(0.3, desc="Processing input...")
+        try:
+            inputs = self.processor(text=text, src_lang=self.languages[src_lang], return_tensors="pt")
+            progress(0.6, desc="Generating audio...")
+            audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
+            progress(1.0, desc="Done!")
+            return (self.sample_rate, audio_array)
+        except Exception as e:
+            raise gr.Error(str(e))
+    def translate_audio(self, audio_path, tgt_lang, progress=gr.Progress()):
+        progress(0.3, desc="Loading audio...")
         try:
             audio, orig_freq = torchaudio.load(audio_path)
             audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=16000)
+            progress(0.6, desc="Translating...")
             inputs = self.processor(audios=audio, return_tensors="pt")
+            audio_array = self.model.generate(**inputs, tgt_lang=self.languages[tgt_lang])[0].cpu().numpy().squeeze()
+            progress(1.0, desc="Done!")
+            return (self.sample_rate, audio_array)
         except Exception as e:
+            raise gr.Error(str(e))
 css = """
+#component-0 {
+    max-width: 1200px;
+    margin: auto;
     padding: 20px;
 }
+.container {
+    border-radius: 12px;
+    padding: 20px;
 }
+.gr-form {
+    border-color: #e5e7eb !important;
 }
+.gr-button {
+    border-radius: 8px !important;
+    background: linear-gradient(to right, #2563eb, #4f46e5) !important;
+    color: white !important;
+    font-weight: 600 !important;
 }
+.gr-button:hover {
+    box-shadow: 0 4px 6px -1px rgb(0 0 0 / 0.1) !important;
+    transform: translateY(-1px);
 }
+.gr-input, .gr-select {
+    border-radius: 8px !important;
 }
+.gr-panel {
+    border-radius: 12px !important;
 }
+.title {
+    text-align: center;
+    font-size: 2.5rem;
+    font-weight: bold;
+    margin: 1rem 0;
+    background: linear-gradient(to right, #2563eb, #4f46e5);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
 }
+.subtitle {
+    text-align: center;
+    color: #6b7280;
+    margin-bottom: 2rem;
 }
+.tab-nav {
+    border-bottom: 2px solid #e5e7eb;
+    margin-bottom: 1rem;
 }
+.output-label {
+    font-weight: 600;
+    color: #374151;
+    margin-bottom: 0.5rem;
 }
+.footer {
+    text-align: center;
+    margin-top: 2rem;
+    padding-top: 1rem;
+    border-top: 1px solid #e5e7eb;
+    color: #6b7280;
+    font-size: 0.875rem;
 }
 """
+def create_ui():
+    translator = SeamlessTranslator()
+    with gr.Blocks(css=css, title="A.R.I.S. Translator") as demo:
+        gr.HTML(
+            """
+            <div class="title">A.R.I.S. Translator</div>
+            <div class="subtitle">Advanced Real-time Interpretation System</div>
+            """
         )
+        with gr.Tabs() as tabs:
+            # Text to Speech Tab
+            with gr.Tab("Text Translation", id=1):
+                with gr.Row():
+                    with gr.Column():
                         text_input = gr.Textbox(
+                            label="Text to Translate",
+                            placeholder="Enter your text here...",
+                            lines=5
                         )
                         with gr.Row():
+                            src_lang = gr.Dropdown(
+                                choices=list(translator.languages.keys()),
+                                value="English",
+                                label="Source Language"
                             )
+                            tgt_lang = gr.Dropdown(
+                                choices=list(translator.languages.keys()),
+                                value="Spanish",
+                                label="Target Language"
                             )
+                        translate_btn = gr.Button("Translate", variant="primary")
+                    with gr.Column():
+                        gr.HTML('<div class="output-label">Translation Output</div>')
+                        audio_output = gr.Audio(
+                            label="Translated Audio",
+                            type="numpy"
+                        )
+            # Audio to Speech Tab
+            with gr.Tab("Audio Translation", id=2):
+                with gr.Row():
+                    with gr.Column():
                         audio_input = gr.Audio(
+                            label="Upload Audio",
                             type="filepath"
                         )
                         tgt_lang_audio = gr.Dropdown(
+                            choices=list(translator.languages.keys()),
+                            value="English",
+                            label="Target Language"
                         )
+                        translate_audio_btn = gr.Button("Translate Audio", variant="primary")
+                    with gr.Column():
+                        gr.HTML('<div class="output-label">Translation Output</div>')
+                        audio_output_from_audio = gr.Audio(
+                            label="Translated Audio",
+                            type="numpy"
+                        )
+        gr.HTML(
+            """
+            <div class="footer">
+                Powered by Meta's SeamlessM4T model | Built with Gradio
+            </div>
+            """
+        )
+        # Event handlers
+        translate_btn.click(
+            fn=translator.translate_text,
+            inputs=[text_input, src_lang, tgt_lang],
+            outputs=audio_output
+        )
+        translate_audio_btn.click(
+            fn=translator.translate_audio,
+            inputs=[audio_input, tgt_lang_audio],
+            outputs=audio_output_from_audio
+        )
     return demo
 if __name__ == "__main__":
+    demo = create_ui()
     demo.queue()
     demo.launch()