Spaces:

moslem
/

image-to-text

Runtime error

App Files Files Community

moslem commited on Oct 16, 2025

Commit

f9a718e

verified ·

1 Parent(s): cc8c904

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +41 -0
app.py +185 -0
requirements.txt +13 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,41 @@

+# Dockerfile for FastAPI Hugging Face app
+FROM python:3.10-slim
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PYTHONUNBUFFERED=1
+# Install essential system dependencies
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+      build-essential git curl libgl1 libglib2.0-0 libsndfile1 ffmpeg \
+ && rm -rf /var/lib/apt/lists/*
+# Create a non-root user (required for Spaces)
+RUN useradd -m -u 1000 user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+# Copy dependency list and install
+COPY requirements.txt .
+RUN python -m pip install --upgrade pip \
+ && python -m pip install --no-cache-dir -r requirements.txt
+# Copy source code
+COPY . .
+# Set ownership for non-root user
+RUN chown -R user:user $HOME
+# Switch to non-root user
+USER user
+# Expose port
+EXPOSE 7860
+ENV PORT=7860
+# Run FastAPI app
+CMD ["sh", "-c", "uvicorn app:app --host 0.0.0.0 --port ${PORT} --workers 1"]

app.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""
+app.py
+FastAPI application for serving either:
+  - a text-generation LLM, or
+  - a visual-language model (VLM) for image captioning.
+Environment variables:
+  MODEL_ID           — Hugging Face model repo id (default: "gpt2")
+  MODEL_TYPE         — "llm" or "vlm"  (default: "llm")
+  TRUST_REMOTE_CODE  — "true"/"false" for custom model code
+"""
+import os
+import io
+import asyncio
+import logging
+from typing import Optional
+import torch
+from PIL import Image
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from pydantic import BaseModel
+from transformers import pipeline
+from transformers.pipelines import Pipeline
+# -------------------------------------------------------------------------
+# Configuration
+# -------------------------------------------------------------------------
+MODEL_ID = os.environ.get("MODEL_ID", "gpt2")
+MODEL_TYPE = os.environ.get("MODEL_TYPE", "llm").lower()  # "llm" or "vlm"
+TRUST_REMOTE_CODE = os.environ.get("TRUST_REMOTE_CODE", "false").lower() in (
+    "1",
+    "true",
+    "yes",
+)
+# Logging setup
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger("hf-fastapi")
+# FastAPI instance
+app = FastAPI(title="Hugging Face FastAPI LLM/VLM Demo")
+# Lazy-loaded model pipeline
+pipe: Optional[Pipeline] = None
+load_error: Optional[str] = None
+# -------------------------------------------------------------------------
+# Helper functions
+# -------------------------------------------------------------------------
+def get_device() -> int:
+    """Return CUDA device index if available, else CPU (-1)."""
+    return 0 if torch.cuda.is_available() else -1
+async def run_blocking(func, *args, **kwargs):
+    """Run blocking pipeline calls in a thread pool."""
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, lambda: func(*args, **kwargs))
+# -------------------------------------------------------------------------
+# Model loading
+# -------------------------------------------------------------------------
+@app.on_event("startup")
+def load_model():
+    """Load model pipeline on startup."""
+    global pipe, load_error
+    device = get_device()
+    try:
+        logger.info(f"Loading model '{MODEL_ID}' ({MODEL_TYPE}) on device {device}...")
+        if MODEL_TYPE == "vlm":
+            pipe = pipeline(
+                "image-to-text",
+                model=MODEL_ID,
+                device=device,
+                trust_remote_code=TRUST_REMOTE_CODE,
+            )
+        else:
+            pipe = pipeline(
+                "text-generation",
+                model=MODEL_ID,
+                device=device,
+                trust_remote_code=TRUST_REMOTE_CODE,
+            )
+        logger.info("✅ Model loaded successfully.")
+    except Exception as e:
+        load_error = str(e)
+        logger.exception("❌ Failed to load model: %s", e)
+# -------------------------------------------------------------------------
+# API models
+# -------------------------------------------------------------------------
+class TextRequest(BaseModel):
+    prompt: str
+    max_new_tokens: Optional[int] = 64
+    do_sample: Optional[bool] = False
+    temperature: Optional[float] = 0.7
+# -------------------------------------------------------------------------
+# Routes
+# -------------------------------------------------------------------------
+@app.get("/", tags=["health"])
+def root():
+    """Root endpoint showing model info."""
+    return {
+        "status": "ok",
+        "model_id": MODEL_ID,
+        "model_type": MODEL_TYPE,
+        "device": "cuda" if torch.cuda.is_available() else "cpu",
+        "model_loaded": pipe is not None,
+        "load_error": load_error,
+    }
+@app.get("/health", tags=["health"])
+def health():
+    """Simple health check."""
+    if load_error:
+        return {"status": "error", "detail": load_error}
+    return {"status": "healthy"}
+@app.post("/generate-text", tags=["text"])
+async def generate_text(req: TextRequest):
+    """Generate text using an LLM."""
+    if MODEL_TYPE == "vlm":
+        raise HTTPException(status_code=400, detail="Model is VLM. Use /image-caption.")
+    if pipe is None:
+        raise HTTPException(status_code=503, detail=f"Model not loaded: {load_error or 'loading...'}")
+    try:
+        outputs = await run_blocking(
+            pipe,
+            req.prompt,
+            max_new_tokens=req.max_new_tokens,
+            do_sample=req.do_sample,
+            temperature=req.temperature,
+            return_full_text=False,
+        )
+    except Exception as e:
+        logger.exception("Generation failed: %s", e)
+        raise HTTPException(status_code=500, detail=f"Generation failed: {e}")
+    if isinstance(outputs, list) and outputs:
+        text_out = outputs[0].get("generated_text") or outputs[0].get("text") or str(outputs[0])
+    else:
+        text_out = str(outputs)
+    return {"generated_text": text_out}
+@app.post("/image-caption", tags=["image"])
+async def image_caption(file: UploadFile = File(...)):
+    """Caption an uploaded image using a VLM."""
+    if MODEL_TYPE != "vlm":
+        raise HTTPException(status_code=400, detail="Model is LLM. Set MODEL_TYPE=vlm.")
+    if pipe is None:
+        raise HTTPException(status_code=503, detail=f"Model not loaded: {load_error or 'loading...'}")
+    try:
+        contents = await file.read()
+        img = Image.open(io.BytesIO(contents)).convert("RGB")
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Invalid image file: {e}")
+    try:
+        outputs = await run_blocking(pipe, img)
+    except Exception as e:
+        logger.exception("Captioning failed: %s", e)
+        raise HTTPException(status_code=500, detail=f"Captioning failed: {e}")
+    if isinstance(outputs, list) and outputs:
+        caption = outputs[0].get("generated_text") or outputs[0].get("caption") or str(outputs[0])
+    else:
+        caption = str(outputs)
+    return {"caption": caption}

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+# Core dependencies
+fastapi>=0.95.0
+uvicorn[standard]>=0.18.0
+transformers>=4.30.0
+torch>=2.0.0
+pillow>=9.0.0
+python-multipart>=0.0.5
+# Optional (uncomment if needed)
+accelerate>=0.20.3
+diffusers>=0.11.0
+sentencepiece>=0.1.98
+safetensors>=0.3.0