Upload 3 files
Browse files- utils/caption_utils.py +26 -0
- utils/image_utils.py +21 -0
- utils/topic_generation.py +31 -0
utils/caption_utils.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import BlipProcessor, BlipForConditionalGeneration
|
| 3 |
+
from utils.image_utils import load_image
|
| 4 |
+
|
| 5 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class ImageCaptioning:
|
| 9 |
+
|
| 10 |
+
def __int__(self):
|
| 11 |
+
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
|
| 12 |
+
self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
|
| 13 |
+
|
| 14 |
+
def get_caption(self, image_path):
|
| 15 |
+
image = load_image(image_path)
|
| 16 |
+
|
| 17 |
+
# Preprocessing the Image
|
| 18 |
+
img = self.processor(image, return_tensors="pt").to(device)
|
| 19 |
+
|
| 20 |
+
# Generating captions
|
| 21 |
+
output = self.model.generate(**img)
|
| 22 |
+
|
| 23 |
+
# decode the output
|
| 24 |
+
caption = self.processor.batch_decode(output, skip_special_tokens=True)[0]
|
| 25 |
+
|
| 26 |
+
return caption
|
utils/image_utils.py
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import urllib.parse as parse
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# Verify url
|
| 8 |
+
def check_url(string):
|
| 9 |
+
try:
|
| 10 |
+
result = parse.urlparse(string)
|
| 11 |
+
return all([result.scheme, result.netloc, result.path])
|
| 12 |
+
except:
|
| 13 |
+
return False
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# Load an image
|
| 17 |
+
def load_image(image_path):
|
| 18 |
+
if check_url(image_path):
|
| 19 |
+
return Image.open(requests.get(image_path, stream=True).raw)
|
| 20 |
+
elif os.path.exists(image_path):
|
| 21 |
+
return Image.open(image_path)
|
utils/topic_generation.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
| 3 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class TopicGenerator:
|
| 7 |
+
|
| 8 |
+
def __init__(self):
|
| 9 |
+
# Initialize tokenizer and model upon class instantiation
|
| 10 |
+
self.tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
|
| 11 |
+
self.model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large").to(device) # assuming you have a GPU available
|
| 12 |
+
|
| 13 |
+
def generate_topics(self, user_input, num_topics=3):
|
| 14 |
+
"""
|
| 15 |
+
Generate topic sentences based on the user input.
|
| 16 |
+
|
| 17 |
+
Args:
|
| 18 |
+
- user_input (str): The input text provided by the user.
|
| 19 |
+
- num_topics (int, optional): Number of topics to generate. Defaults to 3.
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
- list: A list of generated topic sentences.
|
| 23 |
+
"""
|
| 24 |
+
prompt_text = f"Generate a topic sentence based on the following input: {user_input}"
|
| 25 |
+
input_ids = self.tokenizer(prompt_text, return_tensors="pt").input_ids.to(device)
|
| 26 |
+
|
| 27 |
+
# Generate topics
|
| 28 |
+
outputs = self.model.generate(input_ids, do_sample=True, top_k=50, temperature=0.7, max_length=50, num_return_sequences=num_topics)
|
| 29 |
+
|
| 30 |
+
# Decode the outputs and return as a list of topic sentences
|
| 31 |
+
return [self.tokenizer.decode(output, skip_special_tokens=True) for output in outputs]
|