Spaces:

fax4ever
/

myspace

Sleeping

App Files Files Community

Fabio Massimo Ercoli commited on Dec 31, 2023

Commit

a397ef0

·

1 Parent(s): 4eb9590

play

Files changed (6) hide show

Dockerfile +3 -0
app.py +3 -5
caption_service.py +31 -0
images.jpeg +0 -0
roses.avif +0 -0
test.py +3 -0

Dockerfile CHANGED Viewed

@@ -24,4 +24,7 @@ WORKDIR $HOME/app
 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 # Copy the current directory contents into the container at $HOME/app setting the owner to the user
 COPY --chown=user . $HOME/app
+# Activate the ML model at build time
+RUN python $HOME/app/test.py
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 from fastapi import FastAPI
-from transformers import pipeline
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 app = FastAPI(docs_url="/")
-pipe = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
 @app.get("/image")
-def generate(text: str):
-    output = pipe("https://ankur3107.github.io/assets/images/image-captioning-example.png")
     return {"output": output[0]["generated_text"]}

 from fastapi import FastAPI
+from caption_service import openAndGenerate
 # NOTE - we configure docs_url to serve the interactive Docs at the root path
 # of the app. This way, we can use the docs as a landing page for the app on Spaces.
 app = FastAPI(docs_url="/")
 @app.get("/image")
+def image(text: str):
+    output = openAndGenerate('images.jpeg')
     return {"output": output[0]["generated_text"]}

caption_service.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer
+import torch
+from PIL import Image
+model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+feature_extractor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning")
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+max_length = 16
+num_beams = 4
+gen_kwargs = {'max_length': max_length, 'num_beams': num_beams}
+def generate(image):
+    if image.mode != "RGB":
+      image = image.convert(mode="RGB")
+    pixel_values = feature_extractor(images=[image], return_tensors='pt').pixel_values
+    pixel_values = pixel_values.to(device)
+    output_ids = model.generate(pixel_values, **gen_kwargs)
+    preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
+    preds = [pred.strip() for pred in preds]
+    return preds[0]
+def openAndGenerate(image_path):
+   return generate(Image.open(image_path))

images.jpeg ADDED Viewed

roses.avif ADDED Viewed

test.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from caption_service import openAndGenerate
2	+
3	+ openAndGenerate('images.jpeg')