aya369 commited on
Commit
b1ea222
·
verified ·
1 Parent(s): 136297a

Update captioning/url_caption.py

Browse files
Files changed (1) hide show
  1. captioning/url_caption.py +36 -0
captioning/url_caption.py CHANGED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import requests
3
+ from io import BytesIO
4
+ from bs4 import BeautifulSoup
5
+ from .image_caption import processor, model
6
+
7
+ def caption_from_url(url: str):
8
+ try:
9
+ response = requests.get(url)
10
+ soup = BeautifulSoup(response.text, 'html.parser')
11
+ img_elements = soup.find_all('img')
12
+ results = []
13
+ for img_element in img_elements:
14
+ img_url = img_element.get('src')
15
+ if not img_url:
16
+ continue
17
+ if img_url.startswith('//'):
18
+ img_url = 'https:' + img_url
19
+ elif not img_url.startswith('http'):
20
+ continue
21
+ try:
22
+ resp = requests.get(img_url)
23
+ raw_image = Image.open(BytesIO(resp.content)).convert('RGB')
24
+ if raw_image.size[0] * raw_image.size[1] < 400:
25
+ continue
26
+ inputs = processor(raw_image, return_tensors="pt")
27
+ out = model.generate(**inputs, max_new_tokens=50)
28
+ caption = processor.decode(out[0], skip_special_tokens=True)
29
+ results.append(f"{img_url}: {caption}")
30
+ except:
31
+ continue
32
+ if not results:
33
+ return "No suitable images found or failed to process images."
34
+ return "\n".join(results)
35
+ except:
36
+ return "Failed to fetch the webpage. Check the URL."