Allex21 commited on
Commit
5b0bbfc
·
verified ·
1 Parent(s): f5b3776

Update preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +20 -21
preprocess.py CHANGED
@@ -1,6 +1,7 @@
 
1
  import os
2
  from PIL import Image
3
- from transformers import AutoProcessor, BlipForConditionalGeneration
4
 
5
  def process_dataset(zip_path, output_dir, generate_captions=True):
6
  os.makedirs(output_dir, exist_ok=True)
@@ -10,29 +11,27 @@ def process_dataset(zip_path, output_dir, generate_captions=True):
10
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
11
  zip_ref.extractall(output_dir)
12
 
13
- # Gera captions com BLIP multilíngue
14
- if generate_captions:
15
- processor = AutoProcessor.from_pretrained("microsoft/blip-image-captioning-base")
16
- model = BlipForConditionalGeneration.from_pretrained("microsoft/blip-image-captioning-base")
17
-
18
- for img_name in os.listdir(output_dir):
19
- if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
20
- img_path = os.path.join(output_dir, img_name)
21
- image = Image.open(img_path).convert('RGB')
22
-
23
- inputs = processor(images=image, return_tensors="pt")
24
- outputs = model.generate(**inputs, max_new_tokens=50)
25
- caption = processor.decode(outputs[0], skip_special_tokens=True)
26
-
27
- with open(img_path.replace('.jpg', '.txt').replace('.png', '.txt'), 'w') as f:
28
- f.write(caption)
29
-
30
- # Redimensiona imagens
31
  for img_name in os.listdir(output_dir):
32
  if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
33
  img_path = os.path.join(output_dir, img_name)
34
  image = Image.open(img_path).convert('RGB')
35
- image = image.resize((512, 512), Image.LANCZOS)
36
- image.save(img_path)
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  return output_dir
 
1
+ # preprocess.py
2
  import os
3
  from PIL import Image
4
+ from transformers import BlipProcessor, BlipForConditionalGeneration
5
 
6
  def process_dataset(zip_path, output_dir, generate_captions=True):
7
  os.makedirs(output_dir, exist_ok=True)
 
11
  with zipfile.ZipFile(zip_path, 'r') as zip_ref:
12
  zip_ref.extractall(output_dir)
13
 
14
+ # Carrega BLIP (em inglês — modelo oficial da Salesforce)
15
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
16
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
17
+
18
+ # Processa imagens
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  for img_name in os.listdir(output_dir):
20
  if img_name.lower().endswith(('.png', '.jpg', '.jpeg')):
21
  img_path = os.path.join(output_dir, img_name)
22
  image = Image.open(img_path).convert('RGB')
23
+
24
+ # Redimensiona para evitar erros de memória
25
+ image.thumbnail((512, 512), Image.LANCZOS)
26
+ image.save(img_path) # Salva imagem redimensionada
27
+
28
+ if generate_captions:
29
+ inputs = processor(image, return_tensors="pt")
30
+ outputs = model.generate(**inputs, max_new_tokens=50)
31
+ caption = processor.decode(outputs[0], skip_special_tokens=True)
32
+
33
+ txt_path = os.path.splitext(img_path)[0] + ".txt"
34
+ with open(txt_path, "w", encoding="utf-8") as f:
35
+ f.write(caption)
36
 
37
  return output_dir