Spaces:
Sleeping
Sleeping
Upload 11 files
Browse files- Dockerfile.txt +17 -0
- app.py +34 -0
- models/Flashcardd.py +10 -0
- models/__init__.py +0 -0
- requirements.txt +9 -0
- translations/__init__.py +0 -0
- translations/__pycache__/__init__.cpython-39.pyc +0 -0
- translations/__pycache__/model_name_mapping.cpython-39.pyc +0 -0
- translations/__pycache__/translate.cpython-39.pyc +0 -0
- translations/model_name_mapping.py +47 -0
- translations/translate.py +45 -0
Dockerfile.txt
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Dockerfile
|
| 2 |
+
FROM python:3.10-slim
|
| 3 |
+
|
| 4 |
+
WORKDIR /app
|
| 5 |
+
|
| 6 |
+
# Install dependencies
|
| 7 |
+
COPY requirements.txt .
|
| 8 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
+
|
| 10 |
+
# Copy the app files
|
| 11 |
+
COPY . .
|
| 12 |
+
|
| 13 |
+
# Expose the port FastAPI will run on
|
| 14 |
+
EXPOSE 7860
|
| 15 |
+
|
| 16 |
+
# Run the application
|
| 17 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
app.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from fastapi import FastAPI, Body, File, UploadFile, Request
|
| 3 |
+
from fastapi.responses import HTMLResponse, RedirectResponse
|
| 4 |
+
from fastapi.staticfiles import StaticFiles
|
| 5 |
+
from fastapi.templating import Jinja2Templates
|
| 6 |
+
|
| 7 |
+
from models.Flashcardd import Flashcard
|
| 8 |
+
from translations.translate import load_model_and_tokenizer, translate
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
app = FastAPI()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@app.get("/")
|
| 15 |
+
async def home():
|
| 16 |
+
return {"message": "hola-mondo"}
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
@app.post("/flashcards/create_flashcard")
|
| 20 |
+
async def create_flashcard(new_flashcard=Body()) -> Flashcard:
|
| 21 |
+
print(f"new flashcard: {new_flashcard}")
|
| 22 |
+
from_lang = new_flashcard["from"]
|
| 23 |
+
to_lang = new_flashcard["to"]
|
| 24 |
+
model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang)
|
| 25 |
+
translation = translate(new_flashcard["word"], model, tokenizer)
|
| 26 |
+
|
| 27 |
+
return Flashcard(
|
| 28 |
+
name=new_flashcard["word"],
|
| 29 |
+
translation=translation,
|
| 30 |
+
# sample_sentence=f"{new_flashcard['word']}",
|
| 31 |
+
sample_sentence="notes..",
|
| 32 |
+
# "translation": f"translation: {new_flashcard['word']}",
|
| 33 |
+
# "translation": translation,
|
| 34 |
+
)
|
models/Flashcardd.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, constr
|
| 2 |
+
from typing import Union, Optional
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class Flashcard(BaseModel):
|
| 6 |
+
name: constr(max_length=100, min_length=5) # type: ignore
|
| 7 |
+
translation: Optional[str] = ""
|
| 8 |
+
sample_sentence: Optional[str] = ""
|
| 9 |
+
|
| 10 |
+
|
models/__init__.py
ADDED
|
File without changes
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
fastapi==0.112.0
|
| 3 |
+
sentencepiece==0.2.0
|
| 4 |
+
numpy==1.23.1
|
| 5 |
+
sacremoses==0.1.1
|
| 6 |
+
tokenizers==0.19.1
|
| 7 |
+
transformers==4.43.3
|
| 8 |
+
huggingface-hub==0.24.5
|
| 9 |
+
torch
|
translations/__init__.py
ADDED
|
File without changes
|
translations/__pycache__/__init__.cpython-39.pyc
ADDED
|
Binary file (163 Bytes). View file
|
|
|
translations/__pycache__/model_name_mapping.cpython-39.pyc
ADDED
|
Binary file (1.84 kB). View file
|
|
|
translations/__pycache__/translate.cpython-39.pyc
ADDED
|
Binary file (1.59 kB). View file
|
|
|
translations/model_name_mapping.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dataclasses import dataclass, field
|
| 2 |
+
from typing import Dict, Literal, Tuple
|
| 3 |
+
from pydantic import BaseModel, ValidationError
|
| 4 |
+
from enum import Enum
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class ModelNameMapping(BaseModel):
|
| 8 |
+
data: Dict[Tuple, str] = field(default_factory=dict)
|
| 9 |
+
|
| 10 |
+
def __getitem__(self, key: Tuple) -> str:
|
| 11 |
+
return self.data[key]
|
| 12 |
+
|
| 13 |
+
def get(self, key: Tuple) -> str:
|
| 14 |
+
return self.data.get(key, "")
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class LanguagesEnum(Enum):
|
| 18 |
+
es = "es"
|
| 19 |
+
en = "en"
|
| 20 |
+
ca = "ca"
|
| 21 |
+
pt = "pt"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
es_to_en: Tuple = (LanguagesEnum.es.value, LanguagesEnum.en.value)
|
| 25 |
+
es_to_ca: Tuple = (LanguagesEnum.es.value, LanguagesEnum.ca.value)
|
| 26 |
+
es_to_po: Tuple = (LanguagesEnum.es.value, LanguagesEnum.pt.value)
|
| 27 |
+
|
| 28 |
+
en_to_es: Tuple = (LanguagesEnum.en.value, LanguagesEnum.es.value)
|
| 29 |
+
en_to_po: Tuple = (LanguagesEnum.en.value, LanguagesEnum.pt.value)
|
| 30 |
+
|
| 31 |
+
ca_to_es: Tuple = (LanguagesEnum.ca.value, LanguagesEnum.es.value)
|
| 32 |
+
|
| 33 |
+
po_to_en: Tuple = (LanguagesEnum.pt.value, LanguagesEnum.en.value)
|
| 34 |
+
po_to_es: Tuple = (LanguagesEnum.pt.value, LanguagesEnum.es.value)
|
| 35 |
+
|
| 36 |
+
models = {
|
| 37 |
+
es_to_en: "Helsinki-NLP/opus-mt-es-en",
|
| 38 |
+
es_to_ca: "Helsinki-NLP/opus-mt-es-ca",
|
| 39 |
+
en_to_es: "Helsinki-NLP/opus-mt-en-es",
|
| 40 |
+
en_to_po: "Helsinki-NLP/opus-mt-tc-big-en-pt",
|
| 41 |
+
ca_to_es: "Helsinki-NLP/opus-mt-ca-es",
|
| 42 |
+
po_to_en: "Helsinki-NLP/opus-mt-pt-en",
|
| 43 |
+
po_to_es: "Helsinki-NLP/opus-mt-pt-es",
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
MODEL_NAME_MAPPING = ModelNameMapping(data=models)
|
translations/translate.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import MarianMTModel, MarianTokenizer
|
| 2 |
+
from translations.model_name_mapping import MODEL_NAME_MAPPING
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def load_model_and_tokenizer(from_lang: str, to_lang: str):
|
| 6 |
+
print(f"load_model_and_tokenizer from: {from_lang}, to: {to_lang}")
|
| 7 |
+
|
| 8 |
+
model_name = MODEL_NAME_MAPPING.get((from_lang, to_lang))
|
| 9 |
+
print(f"model_name: {model_name}")
|
| 10 |
+
tokenizer = MarianTokenizer.from_pretrained(model_name)
|
| 11 |
+
model = MarianMTModel.from_pretrained(model_name)
|
| 12 |
+
return model, tokenizer
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def translate(text, model, tokenizer):
|
| 16 |
+
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
|
| 17 |
+
translated_text = [
|
| 18 |
+
tokenizer.decode(t, skip_special_tokens=True) for t in translated
|
| 19 |
+
]
|
| 20 |
+
return translated_text[0]
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def test_translations(from_lang, to_lang, text_to_translate):
|
| 25 |
+
model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang)
|
| 26 |
+
translation = translate(text_to_translate, model, tokenizer)
|
| 27 |
+
print(f"Translated text from: {from_lang}, to: {to_lang}, translation: {translation}")
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
if __name__ == "__main__":
|
| 31 |
+
# text_to_translate = "hola amigos, tengo hambre"
|
| 32 |
+
|
| 33 |
+
# test_translations(from_lang="es", to_lang="en", text_to_translate=text_to_translate)
|
| 34 |
+
# test_translations(from_lang="es", to_lang="ca", text_to_translate=text_to_translate)
|
| 35 |
+
# test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate)
|
| 36 |
+
|
| 37 |
+
text_to_translate = "hello friends, who's hungry?"
|
| 38 |
+
test_translations(from_lang="en", to_lang="es", text_to_translate=text_to_translate)
|
| 39 |
+
# test_translations(from_lang="en", to_lang="ca", text_to_translate=text_to_translate)
|
| 40 |
+
test_translations(from_lang="en", to_lang="pt", text_to_translate=text_to_translate)
|
| 41 |
+
# test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
text_to_translate = "un cafè sense sucre i amb llet, si us plau"
|
| 45 |
+
test_translations(from_lang="ca", to_lang="es", text_to_translate=text_to_translate)
|