devusman commited on
Commit
3f9181a
·
1 Parent(s): 05e7f60
Files changed (4) hide show
  1. Dockerfile +18 -12
  2. app.py +52 -34
  3. convert_model.py +24 -0
  4. requirements.txt +4 -4
Dockerfile CHANGED
@@ -1,17 +1,23 @@
1
- # Use an official Python runtime as a parent image
2
- FROM python:3.9-slim
3
 
4
- # Set the working directory in the container
5
- WORKDIR /code
6
 
7
- # Copy the dependencies file to the working directory
8
- COPY requirements.txt .
9
 
10
- # Install any needed packages specified in requirements.txt
11
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
 
13
- # Copy the rest of the application code to the working directory
14
- COPY . .
15
 
16
- # Command to run the API
17
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
 
2
 
3
+ ENV PYTHONDONTWRITEBYTECODE=1
4
+ ENV PYTHONUNBUFFERED=1
5
 
6
+ WORKDIR /app
 
7
 
8
+ # system deps for sentencepiece & build
9
+ RUN apt-get update && apt-get install -y git build-essential
10
 
11
+ # copy code
12
+ COPY . /app
13
 
14
+ # install python deps
15
+ RUN pip install --upgrade pip
16
+ RUN pip install -r requirements.txt
17
+
18
+ # Ensure model is downloaded at build-time to speed cold-start (optional)
19
+ # You can uncomment the following to pre-cache HF model in the image:
20
+ RUN python -c "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM; AutoTokenizer.from_pretrained('Helsinki-NLP/opus-mt-en-ar'); AutoModelForSeq2SeqLM.from_pretrained('Helsinki-NLP/opus-mt-en-ar')"
21
+
22
+ # Use gunicorn for production inside Space
23
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app", "--workers", "1", "--threads", "4", "--timeout", "300"]
app.py CHANGED
@@ -1,46 +1,64 @@
1
- from fastapi import FastAPI, HTTPException
2
- from pydantic import BaseModel
3
- import ctranslate2
4
- import sentencepiece as spm
5
  import os
6
 
7
- app = FastAPI()
8
 
9
- # Load the CTranslate2 model and the SentencePiece tokenizer
10
- try:
11
- model_path = "en_ar_ct2_model/"
12
- sp_model_path = os.path.join(model_path, "source.spm")
13
 
14
- translator = ctranslate2.Translator(model_path, device="cpu") # Use "cuda" if on a GPU Space
15
- sp = spm.SentencePieceProcessor()
16
- sp.load(sp_model_path)
17
- except Exception as e:
18
- # This helps in debugging if the model files are not found
19
- raise RuntimeError(f"Error loading model: {e}")
20
 
 
 
 
 
 
 
 
 
21
 
22
- class TranslationRequest(BaseModel):
23
- text: list[str] # Expect a list of strings for batching
 
24
 
25
- class TranslationResponse(BaseModel):
26
- translations: list[str]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- @app.get("/")
29
- def read_root():
30
- return {"message": "English to Arabic Translation API is running."}
 
31
 
32
- @app.post("/translate", response_model=TranslationResponse)
33
- def translate_text(request: TranslationRequest):
34
- if not request.text:
35
- raise HTTPException(status_code=400, detail="Input text list cannot be empty.")
36
 
37
- source_sentences = request.text
38
- source_tokenized = [sp.encode(sentence, out_type=str) for sentence in source_sentences]
39
 
40
- # Translate the batch of sentences
41
- translations_tokenized = translator.translate_batch(source_tokenized)
 
 
 
 
42
 
43
- # Decode the translated sentences
44
- translations = [sp.decode(translation.hypotheses[0]) for translation in translations_tokenized]
45
-
46
- return {"translations": translations}
 
1
+ # app.py
2
+ from flask import Flask, request, jsonify
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
 
4
  import os
5
 
6
+ app = Flask(__name__)
7
 
8
+ MODEL_ID = os.environ.get("MODEL_ID", "Helsinki-NLP/opus-mt-en-ar")
 
 
 
9
 
10
+ # Lazy load on first request (avoid heavy imports on cold boot if you prefer)
11
+ translator = None
 
 
 
 
12
 
13
+ def get_translator():
14
+ global translator
15
+ if translator is None:
16
+ # Load tokenizer + model explicitly to control device/kwargs if needed
17
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
18
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
19
+ translator = pipeline("translation", model=model, tokenizer=tokenizer, src="en", tgt="ar")
20
+ return translator
21
 
22
+ @app.route("/health", methods=["GET"])
23
+ def health():
24
+ return jsonify({"status": "ok"}), 200
25
 
26
+ @app.route("/translate", methods=["POST"])
27
+ def translate():
28
+ """
29
+ Accepts JSON:
30
+ {
31
+ "texts": ["Hello", "How are you?"], # or a single string as "text"
32
+ "max_length": 256, # optional
33
+ "batch_size": 8 # optional
34
+ }
35
+ Returns:
36
+ {
37
+ "translations": ["مرحبا", "كيف حالك؟"]
38
+ }
39
+ """
40
+ payload = request.get_json(force=True)
41
+ if payload is None:
42
+ return jsonify({"error": "invalid json"}), 400
43
 
44
+ # allow single text or list
45
+ texts = payload.get("texts") or payload.get("text")
46
+ if texts is None:
47
+ return jsonify({"error": "provide 'text' or 'texts' in JSON"}), 400
48
 
49
+ if isinstance(texts, str):
50
+ texts = [texts]
 
 
51
 
52
+ max_length = payload.get("max_length", 256)
53
+ batch_size = payload.get("batch_size", 8)
54
 
55
+ pipe = get_translator()
56
+ # pipeline supports batched translation
57
+ translated = pipe(texts, max_length=max_length, batch_size=batch_size)
58
+ # pipeline returns list of dicts like {"translation_text": "..."}
59
+ out = [t["translation_text"] for t in translated]
60
+ return jsonify({"translations": out}), 200
61
 
62
+ if __name__ == "__main__":
63
+ # For local debug only; Spaces uses gunicorn (Dockerfile will define)
64
+ app.run(host="0.0.0.0", port=int(os.environ.get("PORT", 8080)))
 
convert_model.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from ct2_transformers import TransformerConverter
3
+
4
+ # 1. Define the source Hugging Face model
5
+ model_name = "Helsinki-NLP/opus-mt-tc-big-en-ar"
6
+
7
+ # 2. Define the output path for the converted model
8
+ output_dir = "en_ar_ct2_model"
9
+
10
+ # Create the output directory if it doesn't exist
11
+ if not os.path.exists(output_dir):
12
+ os.makedirs(output_dir)
13
+
14
+ print(f"Starting conversion of model: {model_name}")
15
+ print("This may take a few moments...")
16
+
17
+ # 3. Initialize the converter
18
+ converter = TransformerConverter(model_name)
19
+
20
+ # 4. Run the conversion and apply quantization for speed
21
+ converter.convert(output_dir, quantization="int8")
22
+
23
+ print(f"\nModel successfully converted and saved to the '{output_dir}' folder.")
24
+ print("You are now ready to upload this folder to your Hugging Face Space.")
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
- fastapi
2
- uvicorn
3
- ctranslate2
4
  sentencepiece
5
- transformers
 
1
+ flask
2
+ transformers>=4.30
3
+ torch # or torch-cpu depending on environment
4
  sentencepiece
5
+ gunicorn