Yatheshr commited on
Commit
c122b72
·
verified ·
1 Parent(s): ab6b7e3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 1. Import Libraries
2
+ import gradio as gr
3
+ from transformers import CLIPProcessor, CLIPModel
4
+ from PIL import Image
5
+ import torch
6
+
7
+ # 2. Load the Pre-trained Model
8
+ model_name = "openai/clip-vit-base-patch16"
9
+ processor = CLIPProcessor.from_pretrained(model_name)
10
+ model = CLIPModel.from_pretrained(model_name)
11
+
12
+ # 3. Define the Prediction Function
13
+ def classify_image_text(image, text):
14
+ # Process the inputs
15
+ inputs = processor(text=[text], images=image, return_tensors="pt", padding=True)
16
+
17
+ # Get model predictions
18
+ with torch.no_grad():
19
+ outputs = model(**inputs)
20
+
21
+ # Calculate similarity between image and text
22
+ logits_per_image = outputs.logits_per_image
23
+ probs = logits_per_image.softmax(dim=1) # Convert logits to probabilities
24
+
25
+ # Return the prediction
26
+ return {text: probs.item()}
27
+
28
+ # 4. Create the Gradio Interface
29
+ iface = gr.Interface(
30
+ fn=classify_image_text,
31
+ inputs=[gr.Image(type="pil"), gr.Textbox(label="Enter description")],
32
+ outputs=gr.Label(),
33
+ live=True
34
+ )
35
+
36
+ # 5. Launch the App
37
+ iface.launch()