Spaces:

Nova35
/

nllb_distilled_translator

Sleeping

App Files Files Community

Nova35 commited on Apr 20, 2025

Commit

59a2335

verified ·

1 Parent(s): ae46fcf

Upload 7 files

Browse files

Files changed (7) hide show

.gitignore +24 -0
Dockerfile +12 -0
README.md +95 -0
app.py +5 -0
app/main.py +34 -0
app/model.py +44 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,24 @@

+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+.env
+.venv
+venv/
+ENV/

Dockerfile ADDED Viewed

	@@ -0,0 +1,12 @@

+FROM python:3.9-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md ADDED Viewed

	@@ -0,0 +1,95 @@

+# NLLB Translation API
+A FastAPI-based translation service using the NLLB (No Language Left Behind) model for multiple languages, deployed on Hugging Face Spaces.
+## Features
+- Translation between multiple languages using the Nova35/nllb-mbart-indic-distilled model
+- FastAPI-based REST API
+- Docker containerization support
+- Deployable on Hugging Face Spaces
+## API Endpoints
+### POST /translate
+Translate text from one language to another.
+Request body:
+```json
+{
+    "text": "Your text to translate",
+    "source_lang": "English",  // Source language name
+    "target_lang": "Hindi"     // Target language name
+}
+```
+Response:
+```json
+{
+    "translation": "Translated text"
+}
+```
+## Supported Languages
+The model supports the following languages:
+- English (eng_Latn)
+- Hindi (hin_Deva)
+- Tamil (tam_Taml)
+- Telugu (tel_Telu)
+- Kannada (kan_Knda)
+- Malayalam (mal_Mlym)
+- French (fra_Latn)
+- German (deu_Latn)
+- Spanish (spa_Latn)
+- Japanese (jpn_Jpan)
+## Deployment on Hugging Face Spaces
+1. Create a new Space on Hugging Face:
+   - Go to https://huggingface.co/spaces
+   - Click "Create new Space"
+   - Choose "Docker" as the SDK
+   - Name your space (e.g., "nllb-translator")
+2. Push your code to the Space:
+```bash
+git clone https://huggingface.co/spaces/your-username/nllb-translator
+cd nllb-translator
+# Copy your files to this directory
+git add .
+git commit -m "Initial commit"
+git push
+```
+3. Your Space will automatically build and deploy. Once complete, it will be available at:
+   `https://huggingface.co/spaces/your-username/nllb-translator`
+## Local Development
+1. Install dependencies:
+```bash
+pip install -r requirements.txt
+```
+2. Run the application:
+```bash
+python app.py
+```
+The API will be available at `http://localhost:7860`
+## Project Structure
+```
+nllb-translator-app/
+├── app/
+│   ├── main.py
+│   └── model.py
+├── app.py
+├── requirements.txt
+├── Dockerfile
+├── README.md
+└── .gitignore
+```

app.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from app.main import app
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)

app/main.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from .model import load_model, translate_text
+app = FastAPI(title="NLLB Translation API")
+# Load model and tokenizer
+model, tokenizer = load_model()
+class TranslationRequest(BaseModel):
+    text: str
+    source_lang: str
+    target_lang: str
+class TranslationResponse(BaseModel):
+    translation: str
+@app.get("/")
+async def root():
+    return {"message": "Welcome to NLLB Translation API"}
+@app.post("/translate", response_model=TranslationResponse)
+async def translate(request: TranslationRequest):
+    try:
+        translation = translate_text(
+            request.text,
+            request.source_lang,
+            request.target_lang,
+            model,
+            tokenizer
+        )
+        return TranslationResponse(translation=translation)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

app/model.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+# Language codes mapping
+LANGS = {
+    "English": "eng_Latn",
+    "Hindi": "hin_Deva",
+    "Tamil": "tam_Taml",
+    "Telugu": "tel_Telu",
+    "Kannada": "kan_Knda",
+    "Malayalam": "mal_Mlym",
+    "French": "fra_Latn",
+    "German": "deu_Latn",
+    "Spanish": "spa_Latn",
+    "Japanese": "jpn_Jpan",
+}
+def load_model():
+    model_name = "Nova35/nllb-mbart-indic-distilled"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+    return model, tokenizer
+def translate_text(text, source_lang, target_lang, model, tokenizer):
+    # Get the language codes from the mapping
+    src_lang_code = LANGS.get(source_lang)
+    tgt_lang_code = LANGS.get(target_lang)
+    if not src_lang_code or not tgt_lang_code:
+        raise ValueError(f"Unsupported language. Supported languages are: {list(LANGS.keys())}")
+    # Prepare the input text with language codes
+    input_text = f"{src_lang_code} {text}"
+    # Tokenize and generate translation
+    inputs = tokenizer(input_text, return_tensors="pt", padding=True)
+    translated = model.generate(
+        **inputs,
+        forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang_code],
+        max_length=128
+    )
+    # Decode the translation
+    translation = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
+    return translation

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi==0.104.1
+uvicorn==0.24.0
+transformers==4.35.2
+torch==2.1.1
+pydantic==2.5.2