Fabio Massimo Ercoli commited on
Commit
a397ef0
·
1 Parent(s): 4eb9590
Files changed (6) hide show
  1. Dockerfile +3 -0
  2. app.py +3 -5
  3. caption_service.py +31 -0
  4. images.jpeg +0 -0
  5. roses.avif +0 -0
  6. test.py +3 -0
Dockerfile CHANGED
@@ -24,4 +24,7 @@ WORKDIR $HOME/app
24
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
  COPY --chown=user . $HOME/app
26
 
 
 
 
27
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
24
  # Copy the current directory contents into the container at $HOME/app setting the owner to the user
25
  COPY --chown=user . $HOME/app
26
 
27
+ # Activate the ML model at build time
28
+ RUN python $HOME/app/test.py
29
+
30
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -1,13 +1,11 @@
1
  from fastapi import FastAPI
2
- from transformers import pipeline
3
 
4
  # NOTE - we configure docs_url to serve the interactive Docs at the root path
5
  # of the app. This way, we can use the docs as a landing page for the app on Spaces.
6
  app = FastAPI(docs_url="/")
7
 
8
- pipe = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
9
-
10
  @app.get("/image")
11
- def generate(text: str):
12
- output = pipe("https://ankur3107.github.io/assets/images/image-captioning-example.png")
13
  return {"output": output[0]["generated_text"]}
 
1
  from fastapi import FastAPI
2
+ from caption_service import openAndGenerate
3
 
4
  # NOTE - we configure docs_url to serve the interactive Docs at the root path
5
  # of the app. This way, we can use the docs as a landing page for the app on Spaces.
6
  app = FastAPI(docs_url="/")
7
 
 
 
8
  @app.get("/image")
9
+ def image(text: str):
10
+ output = openAndGenerate('images.jpeg')
11
  return {"output": output[0]["generated_text"]}
caption_service.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
2
+ import torch
3
+ from PIL import Image
4
+
5
+ model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
6
+ feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
7
+ tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
8
+
9
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10
+ model.to(device)
11
+
12
+ max_length = 16
13
+ num_beams = 4
14
+ gen_kwargs = {'max_length': max_length, 'num_beams': num_beams}
15
+
16
+ def generate(image):
17
+ if image.mode != "RGB":
18
+ image = image.convert(mode="RGB")
19
+
20
+ pixel_values = feature_extractor(images=[image], return_tensors='pt').pixel_values
21
+ pixel_values = pixel_values.to(device)
22
+
23
+ output_ids = model.generate(pixel_values, **gen_kwargs)
24
+
25
+ preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
26
+ preds = [pred.strip() for pred in preds]
27
+ return preds[0]
28
+
29
+ def openAndGenerate(image_path):
30
+ return generate(Image.open(image_path))
31
+
images.jpeg ADDED
roses.avif ADDED
test.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from caption_service import openAndGenerate
2
+
3
+ openAndGenerate('images.jpeg')