Spaces:

Natthathida
/

project-image-captioning

Runtime error

Natthathida commited on Feb 29, 2024

Commit

73cb49c

verified ·

1 Parent(s): 04f4242

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,8 +4,11 @@ import torchaudio
 from torchaudio.transforms import Resample
 import torch
 from io import BytesIO
 app = Flask(__name__)
 # Initialize TTS model from Hugging Face
 tts_model_name = "suno/bark"
@@ -16,9 +19,9 @@ model_id = "dblasko/blip-dalle3-img2prompt"
 blip_model = BlipForConditionalGeneration.from_pretrained(model_id)
 blip_processor = BlipProcessor.from_pretrained(model_id)
-def generate_caption(image):
     # Generate caption from image using Blip model
-    inputs = blip_processor(images=image, return_tensors="pt")
     pixel_values = inputs.pixel_values
     generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50)
     generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True, temperature=0.8, top_k=40, top_p=0.9)[0]
@@ -32,13 +35,13 @@ def generate_caption(image):
 @app.route('/upload', methods=['POST'])
 def upload_image():
-    if 'image' not in request.files:
         return jsonify({'error': 'No image provided'}), 400
-    image_file = request.files['image'].read()
     generated_caption, audio_path = generate_caption(image_file)
     return jsonify({'generated_caption': generated_caption, 'audio_url': audio_path}), 200
 if __name__ == '__main__':
-    app.run(host='0.0.0.0', port=5000, debug=True)

 from torchaudio.transforms import Resample
 import torch
 from io import BytesIO
+from PIL import Image
+from flask_cors import CORS
 app = Flask(__name__)
+CORS(app)
 # Initialize TTS model from Hugging Face
 tts_model_name = "suno/bark"
 blip_model = BlipForConditionalGeneration.from_pretrained(model_id)
 blip_processor = BlipProcessor.from_pretrained(model_id)
+def generate_caption(file):
     # Generate caption from image using Blip model
+    inputs = blip_processor(files=file, return_tensors="pt")
     pixel_values = inputs.pixel_values
     generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50)
     generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True, temperature=0.8, top_k=40, top_p=0.9)[0]
 @app.route('/upload', methods=['POST'])
 def upload_image():
+    if 'file' not in request.files:
         return jsonify({'error': 'No image provided'}), 400
+    image_file = request.files['file']
     generated_caption, audio_path = generate_caption(image_file)
     return jsonify({'generated_caption': generated_caption, 'audio_url': audio_path}), 200
 if __name__ == '__main__':
+    app.run(port=5000, debug=True)