Leon4gr45 commited on
Commit
36fcbf8
·
verified ·
1 Parent(s): 8c345e4

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. Dockerfile +16 -0
  2. README.md +8 -5
  3. app.py +52 -0
  4. list_models.py +5 -0
  5. requirements.txt +13 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt requirements.txt
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ ENV HF_HOME=/app/.cache
9
+ RUN mkdir -p $HF_HOME && chown -R 1000:1000 $HF_HOME
10
+
11
+
12
+ COPY . .
13
+
14
+ EXPOSE 7860
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
- title: Phi Inference
3
- emoji: 🔥
4
- colorFrom: red
5
- colorTo: yellow
6
- sdk: docker
 
 
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Phi 3.5 & Chatbot & Vision App
3
+ emoji: 🦋🦋🦋
4
+ colorFrom: purple
5
+ colorTo: indigo
6
+ sdk: gradio
7
+ sdk_version: 4.42.0
8
+ app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from fastapi import FastAPI, UploadFile, File, Depends
4
+ from pydantic import BaseModel
5
+ from transformers import AutoTokenizer, AutoModel
6
+ from PIL import Image
7
+ from typing import Optional
8
+ import io
9
+
10
+ app = FastAPI()
11
+
12
+ device = "cpu" # Running on CPU as per previous instructions
13
+
14
+ # Vision model setup
15
+ model_id = "OpenGVLab/InternVL2_5-2B"
16
+ # Note: Ensure that the model can be loaded directly without .to(device).eval() if it's already configured to load to CPU or is a CPU-only model.
17
+ # For now, keeping .to(device).eval() as it was in the original app.py
18
+ model = AutoModel.from_pretrained(model_id, trust_remote_code=True, torch_dtype="auto").to(device).eval()
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
20
+
21
+ class GenerateRequest(BaseModel):
22
+ text_input: Optional[str] = None
23
+ max_new_tokens: int = 1024
24
+ do_sample: bool = False
25
+
26
+ @app.post("/generate")
27
+ async def generate(image: UploadFile = File(...), request: GenerateRequest = Depends()):
28
+ # Read image
29
+ image_bytes = await image.read()
30
+ pil_image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
31
+
32
+ if request.text_input:
33
+ prompt = f"<s><image>\n{request.text_input}</s>"
34
+ else:
35
+ prompt = f"<s><image>\nDescribe the image.</s>"
36
+
37
+ inputs = tokenizer(prompt, pil_image, return_tensors="pt").to(device)
38
+
39
+ generation_args = {
40
+ "max_new_tokens": request.max_new_tokens,
41
+ "do_sample": request.do_sample,
42
+ }
43
+
44
+ with torch.no_grad():
45
+ output_ids = model.generate(**inputs, **generation_args)
46
+
47
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
48
+ return {"generated_text": response}
49
+
50
+ @app.get("/")
51
+ async def read_root():
52
+ return {"message": "InternVL2_5-2B API. Go to /docs for API documentation."}
list_models.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi
2
+ api = HfApi()
3
+ models = api.list_models(author="OpenGVLab")
4
+ for model in models:
5
+ print(model.modelId)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.112.4
2
+ uvicorn==0.40.0
3
+ torch
4
+ transformers==4.43.0
5
+ accelerate==0.30.0
6
+ sentencepiece
7
+ python-dotenv
8
+ pydantic==2.12.5
9
+ einops
10
+ numpy==1.24.4
11
+ Pillow==10.3.0
12
+ Requests==2.31.0
13
+ timm