Uzaiir commited on
Commit
cd0ce91
·
verified ·
1 Parent(s): 3649d2e

Update src/models/imageCaptioning.py

Browse files
Files changed (1) hide show
  1. src/models/imageCaptioning.py +34 -17
src/models/imageCaptioning.py CHANGED
@@ -1,24 +1,41 @@
1
 
2
- import requests
3
- import base64
4
- import os
5
 
6
 
7
- hf_token = os.environ.get("HUGGINGFACE_API_TOKEN")
8
 
9
- API_URL = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
10
- headers = {
11
- "Authorization": f"Bearer {hf_token}"
12
- }
13
 
14
- def generateCaption(image_path):
15
- with open(image_path, "rb") as image_file:
16
- image_bytes = image_file.read()
17
 
18
- response = requests.post(API_URL, headers=headers, files={"file": image_bytes})
19
 
20
- if response.status_code == 200:
21
- result = response.json()
22
- return result[0]['generated_text']
23
- else:
24
- return f"Error generating caption: {response.text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ # import requests
3
+ # import base64
4
+ # import os
5
 
6
 
7
+ # hf_token = os.environ.get("HUGGINGFACE_API_TOKEN")
8
 
9
+ # API_URL = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning"
10
+ # headers = {
11
+ # "Authorization": f"Bearer {hf_token}"
12
+ # }
13
 
14
+ # def generateCaption(image_path):
15
+ # with open(image_path, "rb") as image_file:
16
+ # image_bytes = image_file.read()
17
 
18
+ # response = requests.post(API_URL, headers=headers, files={"file": image_bytes})
19
 
20
+ # if response.status_code == 200:
21
+ # result = response.json()
22
+ # return result[0]['generated_text']
23
+ # else:
24
+ # return f"Error generating caption: {response.text}"
25
+
26
+
27
+ from PIL import Image
28
+ from transformers import BlipProcessor , BlipForConditionalGeneration
29
+ import torch
30
+
31
+
32
+ processor = BlipProcessor.from_pretrained("./models/Caption")
33
+ model = BlipForConditionalGeneration.from_pretrained("./models/Caption")
34
+
35
+
36
+ def generateCaption(image_path):
37
+ image = Image.open(image_path).convert("RGB")
38
+ inputs = processor(images = image , return_tensors="pt")
39
+ output = model.generate(**inputs)
40
+ caption = processor.decode(output[0], skip_special_tokens = True)
41
+ return caption