Spaces:

Leon4gr45
/

phi_inference

Runtime error

App Files Files Community

Leon4gr45 commited on Jan 11

Commit

36fcbf8

verified ·

1 Parent(s): 8c345e4

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

Dockerfile +16 -0
README.md +8 -5
app.py +52 -0
list_models.py +5 -0
requirements.txt +13 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+FROM python:3.9-slim
+WORKDIR /app
+COPY requirements.txt requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+ENV HF_HOME=/app/.cache
+RUN mkdir -p $HF_HOME && chown -R 1000:1000 $HF_HOME
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,13 @@
 ---
-title: Phi Inference
-emoji: 🔥
-colorFrom: red
-colorTo: yellow
-sdk: docker
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Phi 3.5 & Chatbot & Vision App
+emoji: 🦋🦋🦋
+colorFrom: purple
+colorTo: indigo
+sdk: gradio
+sdk_version: 4.42.0
+app_file: app.py
 pinned: false
+license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+import torch
+from fastapi import FastAPI, UploadFile, File, Depends
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModel
+from PIL import Image
+from typing import Optional
+import io
+app = FastAPI()
+device = "cpu" # Running on CPU as per previous instructions
+# Vision model setup
+model_id = "OpenGVLab/InternVL2_5-2B"
+# Note: Ensure that the model can be loaded directly without .to(device).eval() if it's already configured to load to CPU or is a CPU-only model.
+# For now, keeping .to(device).eval() as it was in the original app.py
+model = AutoModel.from_pretrained(model_id, trust_remote_code=True, torch_dtype="auto").to(device).eval()
+tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+class GenerateRequest(BaseModel):
+    text_input: Optional[str] = None
+    max_new_tokens: int = 1024
+    do_sample: bool = False
+@app.post("/generate")
+async def generate(image: UploadFile = File(...), request: GenerateRequest = Depends()):
+    # Read image
+    image_bytes = await image.read()
+    pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+    if request.text_input:
+        prompt = f"<s><image>\n{request.text_input}</s>"
+    else:
+        prompt = f"<s><image>\nDescribe the image.</s>"
+    inputs = tokenizer(prompt, pil_image, return_tensors="pt").to(device)
+    generation_args = {
+        "max_new_tokens": request.max_new_tokens,
+        "do_sample": request.do_sample,
+    }
+    with torch.no_grad():
+        output_ids = model.generate(**inputs, **generation_args)
+    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+    return {"generated_text": response}
+@app.get("/")
+async def read_root():
+    return {"message": "InternVL2_5-2B API. Go to /docs for API documentation."}

list_models.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from huggingface_hub import HfApi
+api = HfApi()
+models = api.list_models(author="OpenGVLab")
+for model in models:
+    print(model.modelId)

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+fastapi==0.112.4
+uvicorn==0.40.0
+torch
+transformers==4.43.0
+accelerate==0.30.0
+sentencepiece
+python-dotenv
+pydantic==2.12.5
+einops
+numpy==1.24.4
+Pillow==10.3.0
+Requests==2.31.0
+timm