Spaces:

sanjitaa
/

TranslationFastAPI

Runtime error

App Files Files Community

sanjitaa commited on Oct 3, 2023

Commit

04cf650

1 Parent(s): ce2f3c8

upload app files

Browse files

Files changed (3) hide show

Dockerfile.txt +28 -0
main.py +88 -0
requirements.txt +10 -0

Dockerfile.txt ADDED Viewed

	@@ -0,0 +1,28 @@

+# Use the official Python 3.10 image
+FROM python:3.10
+# Set the working directory to /code
+WORKDIR /code
+# Copy the current directory contents into the container at /code
+COPY ./requirements.txt /code/requirements.txt
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,88 @@

+#uvicorn app789:app --host 0.0.0.0 --port 8000 --reload
+from fastapi import FastAPI, UploadFile, Form
+from fastapi.responses import HTMLResponse
+import librosa
+import io
+import json
+import requests
+import textwrap3
+import whisper
+model = whisper.load_model("medium")
+app = FastAPI()
+# from faster_whisper import WhisperModel
+# model_size = "medium"
+# ts_model = WhisperModel(model_size, device="cpu", compute_type="int8")
+@app.get("/")
+def read_root():
+    html_form = """
+    <html>
+        <body>
+            <h2>Audio Transcription</h2>
+            <form action="/transcribe" method="post" enctype="multipart/form-data">
+                <label for="audio_file">Upload an audio file (MP3 or WAV):</label>
+                <input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br>
+                <label for="language_select">Select Target Language:</label>
+                <select id="language_select" name="tgt_lang">
+                    <option value="fr_XX">French</option>
+                    <option value="es_XX">Spanish</option>
+                    <option value="de_DE">German</option>
+                    <option value="hi_IN">Hindi</option>
+                    <option value="en_XX">English</option>
+                    <option value="ja_XX">Japanese</option>
+                    <option value="ne_NP">Nepali</option>
+                    <option value="zh_CN">Chinese</option>
+                    <option value="pt_XX">Portuguese</option>
+                    <!-- Add more language options here -->
+                </select><br><br>
+                <input type="submit" value="Transcribe">
+                <input type="hidden" id="tgt_lang" name="tgt_lang" value="fr_XX">
+            </form>
+        </body>
+    </html>
+    <script>
+        document.getElementById("language_select").addEventListener("change", function () {
+            var selectedLanguage = this.value;
+            document.getElementById("tgt_lang").value = selectedLanguage;
+        });
+    </script>
+    """
+    return HTMLResponse(content=html_form, status_code=200)
+@app.post("/transcribe")
+async def transcribe_audio(audio_file: UploadFile, tgt_lang: str = Form(...)):
+    audio_data = await audio_file.read()
+    audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
+    result = model.transcribe(audio_data, task = "translate")
+    transcribed_text = result['text']
+    if tgt_lang == 'en_XX':
+        return transcribed_text
+    else:
+        chunks = textwrap3.wrap(transcribed_text, 100)
+        #segments, _ = ts_model.transcribe(audio_data, task="translate")
+        # lst = []
+        # for segment in segments:
+        #     lst.append(segment.text)
+        headers = {"Authorization": f"Bearer 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"}
+        API_URL = "https://api-inference.huggingface.co/pipeline/translation/facebook/mbart-large-50-many-to-many-mmt"
+        def query(payload):
+            data = json.dumps(payload)
+            response = requests.request("POST", API_URL, headers=headers, data=data)
+            return json.loads(response.content.decode("utf-8"))
+        translated_text = ''
+        for i in chunks:
+            result = query({"inputs": i, "parameters": {"src_lang": "en_XX", "tgt_lang": tgt_lang}})
+            translated_text = translated_text + result[0]['translation_text']
+        return translated_text

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+librosa
+soundfile
+fastapi
+uvicorn
+transformers
+Torch
+python-multipart
+sentencepiece
+textwrap3
+git+https://github.com/openai/whisper.git