Nova35 commited on
Commit
59a2335
·
verified ·
1 Parent(s): ae46fcf

Upload 7 files

Browse files
Files changed (7) hide show
  1. .gitignore +24 -0
  2. Dockerfile +12 -0
  3. README.md +95 -0
  4. app.py +5 -0
  5. app/main.py +34 -0
  6. app/model.py +44 -0
  7. requirements.txt +5 -0
.gitignore ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.so
5
+ .Python
6
+ env/
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ *.egg-info/
19
+ .installed.cfg
20
+ *.egg
21
+ .env
22
+ .venv
23
+ venv/
24
+ ENV/
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 7860
11
+
12
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # NLLB Translation API
2
+
3
+ A FastAPI-based translation service using the NLLB (No Language Left Behind) model for multiple languages, deployed on Hugging Face Spaces.
4
+
5
+ ## Features
6
+
7
+ - Translation between multiple languages using the Nova35/nllb-mbart-indic-distilled model
8
+ - FastAPI-based REST API
9
+ - Docker containerization support
10
+ - Deployable on Hugging Face Spaces
11
+
12
+ ## API Endpoints
13
+
14
+ ### POST /translate
15
+
16
+ Translate text from one language to another.
17
+
18
+ Request body:
19
+ ```json
20
+ {
21
+ "text": "Your text to translate",
22
+ "source_lang": "English", // Source language name
23
+ "target_lang": "Hindi" // Target language name
24
+ }
25
+ ```
26
+
27
+ Response:
28
+ ```json
29
+ {
30
+ "translation": "Translated text"
31
+ }
32
+ ```
33
+
34
+ ## Supported Languages
35
+
36
+ The model supports the following languages:
37
+ - English (eng_Latn)
38
+ - Hindi (hin_Deva)
39
+ - Tamil (tam_Taml)
40
+ - Telugu (tel_Telu)
41
+ - Kannada (kan_Knda)
42
+ - Malayalam (mal_Mlym)
43
+ - French (fra_Latn)
44
+ - German (deu_Latn)
45
+ - Spanish (spa_Latn)
46
+ - Japanese (jpn_Jpan)
47
+
48
+ ## Deployment on Hugging Face Spaces
49
+
50
+ 1. Create a new Space on Hugging Face:
51
+ - Go to https://huggingface.co/spaces
52
+ - Click "Create new Space"
53
+ - Choose "Docker" as the SDK
54
+ - Name your space (e.g., "nllb-translator")
55
+
56
+ 2. Push your code to the Space:
57
+ ```bash
58
+ git clone https://huggingface.co/spaces/your-username/nllb-translator
59
+ cd nllb-translator
60
+ # Copy your files to this directory
61
+ git add .
62
+ git commit -m "Initial commit"
63
+ git push
64
+ ```
65
+
66
+ 3. Your Space will automatically build and deploy. Once complete, it will be available at:
67
+ `https://huggingface.co/spaces/your-username/nllb-translator`
68
+
69
+ ## Local Development
70
+
71
+ 1. Install dependencies:
72
+ ```bash
73
+ pip install -r requirements.txt
74
+ ```
75
+
76
+ 2. Run the application:
77
+ ```bash
78
+ python app.py
79
+ ```
80
+
81
+ The API will be available at `http://localhost:7860`
82
+
83
+ ## Project Structure
84
+
85
+ ```
86
+ nllb-translator-app/
87
+ ├── app/
88
+ │ ├── main.py
89
+ │ └── model.py
90
+ ├── app.py
91
+ ├── requirements.txt
92
+ ├── Dockerfile
93
+ ├── README.md
94
+ └── .gitignore
95
+ ```
app.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from app.main import app
2
+
3
+ if __name__ == "__main__":
4
+ import uvicorn
5
+ uvicorn.run(app, host="0.0.0.0", port=7860)
app/main.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ from .model import load_model, translate_text
4
+
5
+ app = FastAPI(title="NLLB Translation API")
6
+
7
+ # Load model and tokenizer
8
+ model, tokenizer = load_model()
9
+
10
+ class TranslationRequest(BaseModel):
11
+ text: str
12
+ source_lang: str
13
+ target_lang: str
14
+
15
+ class TranslationResponse(BaseModel):
16
+ translation: str
17
+
18
+ @app.get("/")
19
+ async def root():
20
+ return {"message": "Welcome to NLLB Translation API"}
21
+
22
+ @app.post("/translate", response_model=TranslationResponse)
23
+ async def translate(request: TranslationRequest):
24
+ try:
25
+ translation = translate_text(
26
+ request.text,
27
+ request.source_lang,
28
+ request.target_lang,
29
+ model,
30
+ tokenizer
31
+ )
32
+ return TranslationResponse(translation=translation)
33
+ except Exception as e:
34
+ raise HTTPException(status_code=500, detail=str(e))
app/model.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
2
+
3
+ # Language codes mapping
4
+ LANGS = {
5
+ "English": "eng_Latn",
6
+ "Hindi": "hin_Deva",
7
+ "Tamil": "tam_Taml",
8
+ "Telugu": "tel_Telu",
9
+ "Kannada": "kan_Knda",
10
+ "Malayalam": "mal_Mlym",
11
+ "French": "fra_Latn",
12
+ "German": "deu_Latn",
13
+ "Spanish": "spa_Latn",
14
+ "Japanese": "jpn_Jpan",
15
+ }
16
+
17
+ def load_model():
18
+ model_name = "Nova35/nllb-mbart-indic-distilled"
19
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
20
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
21
+ return model, tokenizer
22
+
23
+ def translate_text(text, source_lang, target_lang, model, tokenizer):
24
+ # Get the language codes from the mapping
25
+ src_lang_code = LANGS.get(source_lang)
26
+ tgt_lang_code = LANGS.get(target_lang)
27
+
28
+ if not src_lang_code or not tgt_lang_code:
29
+ raise ValueError(f"Unsupported language. Supported languages are: {list(LANGS.keys())}")
30
+
31
+ # Prepare the input text with language codes
32
+ input_text = f"{src_lang_code} {text}"
33
+
34
+ # Tokenize and generate translation
35
+ inputs = tokenizer(input_text, return_tensors="pt", padding=True)
36
+ translated = model.generate(
37
+ **inputs,
38
+ forced_bos_token_id=tokenizer.lang_code_to_id[tgt_lang_code],
39
+ max_length=128
40
+ )
41
+
42
+ # Decode the translation
43
+ translation = tokenizer.batch_decode(translated, skip_special_tokens=True)[0]
44
+ return translation
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn==0.24.0
3
+ transformers==4.35.2
4
+ torch==2.1.1
5
+ pydantic==2.5.2