ProfRom commited on
Commit
13a9703
·
verified ·
1 Parent(s): 00c0069

Poojary - Final Assignment submission

Browse files
Files changed (2) hide show
  1. app.txt +99 -0
  2. requirements.txt +2 -9
app.txt ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import os
4
+ import tempfile
5
+ from huggingface_hub import login
6
+ from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering, infer_device, PaliGemmaForConditionalGeneration
7
+ from accelerate import Accelerator
8
+
9
+ # login to Hugging Face
10
+ login(token=os.getenv('HF_TOKEN'))
11
+
12
+ # Set the device
13
+ device = infer_device()
14
+
15
+ # MODEL 1: BLIP-VQA
16
+ processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base")
17
+ model = AutoModelForVisualQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to(device)
18
+
19
+ # Define inference function for Model 1
20
+ def process_image(image, prompt):
21
+ inputs = processor(image, text=prompt, return_tensors="pt").to(device, torch.float16)
22
+
23
+ try:
24
+ # Generate output from the model
25
+ output = model.generate(**inputs, max_new_tokens=10)
26
+
27
+ # Decode and return the output
28
+ decoded_output = processor.batch_decode(output, skip_special_tokens=True)[0].strip()
29
+
30
+ # remove prompt from output
31
+ if decoded_output.startswith(prompt):
32
+ return decoded_output[len(prompt):].strip()
33
+ return decoded_output
34
+ except Exception as e:
35
+ print(f"Error in Model 1: {e}")
36
+ return "An error occurred during processing for Model 1."
37
+
38
+
39
+ # MODEL 2: PaliGemma
40
+ processor2 = AutoProcessor.from_pretrained("google/paligemma-3b-pt-224")
41
+ model2 = PaliGemmaForConditionalGeneration.from_pretrained(
42
+ "google/paligemma-3b-mix-224",
43
+ torch_dtype=torch.bfloat16
44
+ ).to(device)
45
+
46
+
47
+ # Define inference function for Model 2
48
+ def process_image2(image, prompt):
49
+ inputs2 = processor2(
50
+ text=prompt,
51
+ images=image,
52
+ return_tensors="pt"
53
+ ).to(device, model2.dtype)
54
+
55
+ try:
56
+ output = model2.generate(**inputs2, max_new_tokens=10)
57
+ decoded_output = processor2.batch_decode(
58
+ output[:, inputs2["input_ids"].shape[1]:],
59
+ skip_special_tokens=True
60
+ )[0].strip()
61
+
62
+ return decoded_output
63
+ except Exception as e:
64
+ print(f"Error in Model 2: {e}")
65
+ return "An error occurred during processing for Model 2. Ensure your hardware supports bfloat16 or adjust the torch_dtype."
66
+
67
+
68
+ # GRADIO INTERFACE
69
+ inputs_model1 = [
70
+ gr.Image(type="pil"),
71
+ gr.Textbox(label="Prompt", placeholder="Enter your question")
72
+ ]
73
+ inputs_model2 = [
74
+ gr.Image(type="pil"),
75
+ gr.Textbox(label="Prompt", placeholder="Enter your question")
76
+ ]
77
+
78
+ outputs_model1 = gr.Textbox(label="Answer")
79
+ outputs_model2 = gr.Textbox(label="Answer")
80
+
81
+ # Create the Gradio apps for each model
82
+ model1_inf = gr.Interface(
83
+ fn=process_image,
84
+ inputs=inputs_model1,
85
+ outputs=outputs_model1,
86
+ title="Model 1: BLIP-VQA-Base",
87
+ description="Ask a question about the uploaded image using BLIP."
88
+ )
89
+
90
+ model2_inf = gr.Interface(
91
+ fn=process_image2,
92
+ inputs=inputs_model2,
93
+ outputs=outputs_model2,
94
+ title="Model 2: PaliGemma",
95
+ description="Ask a question about the uploaded image using PaliGemma."
96
+ )
97
+
98
+ demo = gr.TabbedInterface([model1_inf, model2_inf],["Model 1 (BLIP)", "Model 2 (PaliGemma)"])
99
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -1,11 +1,4 @@
1
- # requirements.txt
2
-
3
  transformers
4
  torch
5
- gradio
6
- pillow
7
- sentence-transformers
8
- evaluate
9
- rouge_score
10
- absl-py
11
- scikit-learn
 
 
 
1
  transformers
2
  torch
3
+ peft
4
+ gradio