James040 commited on
Commit
4fbafed
·
verified ·
1 Parent(s): 3c880ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -1,29 +1,30 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
  from PIL import Image
4
 
5
- # OPTIMIZATION 1: Explicitly set device="cpu" for faster boot times
6
- print("Loading AI Model into memory...")
7
- get_prompt = pipeline(
8
- "image-to-text",
9
- model="Salesforce/blip-image-captioning-base",
10
- device="cpu"
11
- )
12
  print("Model loaded successfully!")
13
 
14
  def generate_prompt(input_img):
15
  if input_img is None:
16
  return "Please upload an image."
17
  try:
18
- # OPTIMIZATION 2: Convert RGBA (Transparent PNGs) to RGB.
19
- # If we don't do this, transparent images will crash the AI!
20
  clean_image = input_img.convert('RGB')
21
 
22
- # OPTIMIZATION 3: Force the AI to write longer, detailed prompts
23
- # max_new_tokens prevents it from giving lazy 3-word answers
24
- result = get_prompt(clean_image, max_new_tokens=75)
25
 
26
- return result[0]['generated_text']
 
 
 
 
 
 
27
  except Exception as e:
28
  print(f"Error processing image: {e}")
29
  return f"System Error: {str(e)}"
 
1
  import gradio as gr
2
+ from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from PIL import Image
4
 
5
+ print("Loading BLIP Processor and Model...")
6
+ # 1. Load the specific components directly (Bypasses the buggy pipeline names)
7
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
8
+ model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 
 
 
9
  print("Model loaded successfully!")
10
 
11
  def generate_prompt(input_img):
12
  if input_img is None:
13
  return "Please upload an image."
14
  try:
15
+ # 2. Convert to RGB to prevent transparent PNG crashes
 
16
  clean_image = input_img.convert('RGB')
17
 
18
+ # 3. Process the image into numbers the AI understands
19
+ inputs = processor(clean_image, return_tensors="pt")
 
20
 
21
+ # 4. Generate the text (max_new_tokens forces a detailed description)
22
+ output = model.generate(**inputs, max_new_tokens=75)
23
+
24
+ # 5. Decode the numbers back into human-readable text
25
+ generated_text = processor.decode(output[0], skip_special_tokens=True)
26
+
27
+ return generated_text
28
  except Exception as e:
29
  print(f"Error processing image: {e}")
30
  return f"System Error: {str(e)}"