Spaces:

aekpic877
/

gradio

Sleeping

App Files Files Community

aekpic877 commited on Jul 14, 2024

Commit

b2ed79b

verified ·

1 Parent(s): 8dd40fa

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -20

app.py CHANGED Viewed

@@ -1,18 +1,16 @@
 import torch
 from PIL import Image
-from transformers import AutoModel, AutoTokenizer
 import gradio as gr
-# Load a smaller model and tokenizer
-model_name = 'google/vit-base-patch16-224'  # Example of a smaller model, adjust as needed
 try:
-    model = AutoModel.from_pretrained(model_name, torch_dtype=torch.float16)
     model = model.to(device='cuda' if torch.cuda.is_available() else 'cpu')
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
     model.eval()
 except Exception as e:
-    print(f"Error loading model or tokenizer: {e}")
     exit()
 def process_image(image, question):
@@ -21,29 +19,23 @@ def process_image(image, question):
     # Convert Gradio image to PIL Image
     image = Image.fromarray(image).convert('RGB')
-    # Create message list
-    msgs = [{'role': 'user', 'content': question}]
     # Perform inference
     try:
         with torch.no_grad():
-            res = model.chat(
-                image=image,
-                msgs=msgs,
-                tokenizer=tokenizer,
-                sampling=True,  # if sampling=False, beam_search will be used by default
-                temperature=0.7,
-                stream=False  # Set to False for non-streaming output
-            )
-        return res
     except Exception as e:
         return f"Error during model inference: {e}"
 # Define the Gradio interface
 interface = gr.Interface(
     fn=process_image,
-    inputs=[gr.inputs.Image(type='numpy'), gr.inputs.Textbox(label="Question")],
-    outputs="text",
     title="Image Question Answering",
     description="Upload an image and ask a question about it. The model will provide an answer."
 )

 import torch
 from PIL import Image
+from transformers import BlipProcessor, BlipForConditionalGeneration
 import gradio as gr
+# Load the BLIP model and processor
 try:
+    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
     model = model.to(device='cuda' if torch.cuda.is_available() else 'cpu')
     model.eval()
 except Exception as e:
+    print(f"Error loading model or processor: {e}")
     exit()
 def process_image(image, question):
     # Convert Gradio image to PIL Image
     image = Image.fromarray(image).convert('RGB')
+    # Preprocess the image and question
+    inputs = processor(image, question, return_tensors="pt").to(device)
     # Perform inference
     try:
         with torch.no_grad():
+            outputs = model.generate(**inputs)
+        answer = processor.decode(outputs[0], skip_special_tokens=True)
+        return answer
     except Exception as e:
         return f"Error during model inference: {e}"
 # Define the Gradio interface
 interface = gr.Interface(
     fn=process_image,
+    inputs=[gr.Image(type='numpy'), gr.Textbox(label="Question")],
+    outputs=gr.Textbox(),
     title="Image Question Answering",
     description="Upload an image and ask a question about it. The model will provide an answer."
 )