kaitongg commited on
Commit
420d97e
Β·
verified Β·
1 Parent(s): 895ca8c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +341 -104
app.py CHANGED
@@ -1,60 +1,94 @@
1
- import os
2
- import shutil
3
- import json
4
  from PIL import Image
5
  import torch
6
  import torchvision.transforms as T
7
- import timm
8
- import pandas as pd
9
- import gradio as gr
10
  import sentence_transformers
11
- from autogluon.tabular import TabularPredictor
12
- from huggingface_hub import hf_hub_download, snapshot_download
13
- from openai import OpenAI
14
-
15
- # ----------------------
16
- # Load CPU-only image model
17
- # ----------------------
18
- REPO_ID_IMAGE = "keerthikoganti/architecture-design-stages-compact-cnn"
19
- pkl_path = hf_hub_download(repo_id=REPO_ID_IMAGE, filename="model_bundle.pkl")
 
 
 
20
  with open(pkl_path, "rb") as f:
21
  bundle = pickle.load(f)
 
22
  architecture = bundle["architecture"]
23
  num_classes = bundle["num_classes"]
24
  class_names = bundle["class_names"]
25
  state_dict = bundle["state_dict"]
26
- device = "cpu"
 
27
  model = timm.create_model(architecture, pretrained=False, num_classes=num_classes)
28
  model.load_state_dict(state_dict)
29
  model.eval().to(device)
30
- TFM = T.Compose([T.Resize(224), T.CenterCrop(224), T.ToTensor(), T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
31
 
32
- # ----------------------
33
- # Load CPU-only Autogluon predictor
34
- # ----------------------
35
- REPO_ID_AG = "kaitongg/my-autogluon-model"
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  download_dir = "downloaded_predictor"
 
37
  if os.path.exists(download_dir):
38
  shutil.rmtree(download_dir)
 
39
  os.makedirs(download_dir, exist_ok=True)
40
- downloaded_path = snapshot_download(repo_id=REPO_ID_AG, repo_type="model", local_dir=download_dir, local_dir_use_symlinks=False)
 
 
 
 
 
 
 
 
41
  predictor_path = os.path.join(downloaded_path, "autogluon_predictor")
42
  loaded_predictor_from_hub = TabularPredictor.load(predictor_path)
43
 
44
- # ----------------------
45
- # Load sentence transformer
46
- # ----------------------
47
  embedding_model = sentence_transformers.SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
48
 
49
- # ----------------------
50
- # Set up Gemini API client
51
- # ----------------------
 
 
 
 
 
52
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
53
- gemini_client = OpenAI(api_key=GEMINI_API_KEY)
54
 
55
- # ----------------------
56
- # LLM attitude mapping
57
- # ----------------------
 
 
 
 
 
 
 
 
58
  llm_attitude_mapping = {
59
  "brainstorm": "creative and encouraging",
60
  "design_iteration": "constructive and detailed, focusing on improvements",
@@ -63,91 +97,294 @@ llm_attitude_mapping = {
63
  "random": "neutral and informative, perhaps suggesting a relevant stage",
64
  }
65
 
66
- # ----------------------
67
- # Functions: Text & Image classification, Prompt generation, LLM
68
- # ----------------------
69
- def perform_text_classification_and_format(text: str):
70
- if not text:
71
- return "No text provided", {}, "0"
72
- embeddings = embedding_model.encode([text], convert_to_numpy=True)
73
- df_emb = pd.DataFrame(embeddings, columns=[f"e{i}" for i in range(embeddings.shape[1])])
74
- proba_df = loaded_predictor_from_hub.predict_proba(df_emb)
75
- predicted_label = str(loaded_predictor_from_hub.predict(df_emb).iloc[0])
76
- high_concept = "Yes" if predicted_label == "1" else "No"
77
- confidence = float(proba_df.iloc[0]["1"] if predicted_label=="1" else proba_df.iloc[0]["0"])
78
- formatted_text = f"High Concept: {high_concept} (Confidence: {confidence:.2f})"
79
- proba_dict = {"High Concept": float(proba_df.iloc[0]["1"]), "No High Concept": float(proba_df.iloc[0]["0"])}
80
- return formatted_text, proba_dict, predicted_label
81
-
82
-
83
- def perform_classification_and_format(image: Image.Image, text: str):
84
- # Image classification
85
- if image is not None:
86
- img_tensor = TFM(image).unsqueeze(0).to(device)
87
- with torch.no_grad():
88
- img_out = model(img_tensor)
89
- img_probs = torch.softmax(img_out, dim=1)[0]
90
- img_pred_idx = torch.argmax(img_probs).item()
91
- design_stage = class_names[img_pred_idx]
92
- img_results = {class_names[i]: float(img_probs[i]) for i in range(len(class_names))}
93
- else:
94
- design_stage = "unknown"
95
- img_results = {"error": "No image provided"}
96
-
97
- # Text classification
98
- txt_fmt, txt_probs, predicted_label = perform_text_classification_and_format(text)
99
- return img_results, txt_probs, txt_fmt
100
-
101
-
102
- def generate_prompt_only(img_results, txt_probs, predicted_label, text):
103
- design_stage = max(img_results, key=img_results.get) if img_results and 'error' not in img_results else "unknown"
104
- has_high_concept = "Yes" if predicted_label=="1" else "No"
105
- confidence = txt_probs.get("High Concept",0.0) if predicted_label=="1" else txt_probs.get("No High Concept",0.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  llm_attitude = llm_attitude_mapping.get(design_stage, llm_attitude_mapping["random"])
 
107
  prompt = f"""You are an abstract architecture critique interpreter.
108
  Your audience is a low-level architecture student.
109
- The user is at the {design_stage} design stage, so your attitude should be {llm_attitude}.
110
- User input contains high concept: {has_high_concept}.
111
- Write 250-350 words in English with clear examples and actionable advice, ending with a complete sentence.
112
- {text}"""
113
- return prompt
 
 
 
 
 
114
 
 
 
 
 
 
115
 
116
- def generate_feedback_from_prompt(prompt_input: str):
117
- response = gemini_client.chat.completions.create(model="gemini-1.5", messages=[{"role": "user", "content": prompt_input}], max_output_tokens=350, temperature=0.7)
118
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- # ----------------------
121
- # Gradio UI
122
- # ----------------------
123
  examples = [
124
- ["https://balancedarchitecture.com/wp-content/uploads/2021/11/EXISTING-FIRST-FLOOR-PRES-scaled-e1635965923983.jpg", "Exploring spatial relationships and material palettes."],
125
- ["https://cdn.prod.website-files.com/5894a32730554b620f7bf36d/5e848c2d622e7abe1ad48504_5e01ce9f0d272014d0353cd1_Things-You-Need-to-Organize-a-3D-Rendering-Architectural-Project-EASY-RENDER.jpeg", "The window size is too small."],
126
- ["https://architectelevator.com/assets/img/bilbao_sketch.png", "The facade expresses the building's relationship with the urban context."],
 
 
 
127
  ]
128
 
129
- with gr.Blocks() as demo:
130
- gr.Markdown("# Architecture Feedback Generator (Step-by-Step)")
131
- gr.Markdown("Upload an architectural image and provide a text description or question to see classification results and the generated prompt. Click 'Generate Feedback' to get the LLM's response.")
 
 
 
 
 
 
 
 
132
 
133
  with gr.Row():
134
- with gr.Column():
135
- image_input = gr.Image(type="pil", label="Upload Image")
136
- text_input = gr.Textbox(label="Enter Text", lines=4)
137
- classify_btn = gr.Button("Classify & Generate Prompt")
 
 
 
 
 
 
 
138
 
139
- with gr.Column():
140
- image_out = gr.Label(num_top_classes=len(class_names), label="Image Classification Results")
141
- text_out = gr.Textbox(label="Text Classification Results", lines=4)
142
- prompt_box = gr.Textbox(label="Generated Prompt (editable)", lines=6, interactive=True)
143
- generate_feedback_btn = gr.Button("Generate Feedback")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- with gr.Column():
146
- llm_out = gr.Textbox(label="LLM Feedback", lines=12)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- classify_btn.click(fn=perform_classification_and_format, inputs=[image_input, text_input], outputs=[image_out, text_out, text_out])
149
- generate_feedback_btn.click(fn=lambda p: generate_feedback_from_prompt(p), inputs=[prompt_box], outputs=[llm_out])
150
- gr.Examples(examples=examples, inputs=[image_input,text_input], outputs=[image_out,text_out,prompt_box,llm_out], fn=lambda img,txt: (perform_classification_and_format(img,txt)[0], perform_classification_and_format(img,txt)[2], generate_prompt_only(*perform_classification_and_format(img,txt), txt), generate_feedback_from_prompt(generate_prompt_only(*perform_classification_and_format(img,txt), txt))), cache_examples=False)
 
 
 
 
151
 
152
  if __name__ == "__main__":
153
- demo.launch()
 
1
+ import gradio as gr
2
+ import pandas as pd
 
3
  from PIL import Image
4
  import torch
5
  import torchvision.transforms as T
6
+ import os
7
+ import json
 
8
  import sentence_transformers
9
+ from huggingface_hub import hf_hub_download
10
+ import pickle
11
+ import timm
12
+ import google.generativeai as genai
13
+
14
+ # ============================================
15
+ # 1. LOAD IMAGE CLASSIFICATION MODEL
16
+ # ============================================
17
+ print("Loading image classification model...")
18
+ REPO_ID = "keerthikoganti/architecture-design-stages-compact-cnn"
19
+
20
+ pkl_path = hf_hub_download(repo_id=REPO_ID, filename="model_bundle.pkl")
21
  with open(pkl_path, "rb") as f:
22
  bundle = pickle.load(f)
23
+
24
  architecture = bundle["architecture"]
25
  num_classes = bundle["num_classes"]
26
  class_names = bundle["class_names"]
27
  state_dict = bundle["state_dict"]
28
+
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
  model = timm.create_model(architecture, pretrained=False, num_classes=num_classes)
31
  model.load_state_dict(state_dict)
32
  model.eval().to(device)
 
33
 
34
+ TFM = T.Compose([
35
+ T.Resize(224),
36
+ T.CenterCrop(224),
37
+ T.ToTensor(),
38
+ T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
39
+ ])
40
+
41
+ print("βœ“ Image classification model loaded successfully!")
42
+
43
+ # ============================================
44
+ # 2. LOAD TEXT CLASSIFICATION MODEL
45
+ # ============================================
46
+ print("Loading text classification model...")
47
+ from autogluon.tabular import TabularPredictor
48
+ import shutil
49
+
50
+ text_repo_id = "kaitongg/my-autogluon-model"
51
  download_dir = "downloaded_predictor"
52
+
53
  if os.path.exists(download_dir):
54
  shutil.rmtree(download_dir)
55
+
56
  os.makedirs(download_dir, exist_ok=True)
57
+
58
+ from huggingface_hub import snapshot_download
59
+ downloaded_path = snapshot_download(
60
+ repo_id=text_repo_id,
61
+ repo_type="model",
62
+ local_dir=download_dir,
63
+ local_dir_use_symlinks=False,
64
+ )
65
+
66
  predictor_path = os.path.join(downloaded_path, "autogluon_predictor")
67
  loaded_predictor_from_hub = TabularPredictor.load(predictor_path)
68
 
 
 
 
69
  embedding_model = sentence_transformers.SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
70
 
71
+ print("βœ“ Text classification model loaded successfully!")
72
+
73
+ # ============================================
74
+ # 3. INITIALIZE GEMINI API
75
+ # ============================================
76
+ print("Initializing Gemini API...")
77
+
78
+ # Get API key from environment variable (set in Hugging Face Spaces secrets)
79
  GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
 
80
 
81
+ if GEMINI_API_KEY:
82
+ genai.configure(api_key=GEMINI_API_KEY)
83
+ gemini_model = genai.GenerativeModel('gemini-1.5-flash')
84
+ print("βœ“ Gemini API initialized successfully!")
85
+ else:
86
+ gemini_model = None
87
+ print("⚠️ Warning: GEMINI_API_KEY not found in environment variables")
88
+
89
+ # ============================================
90
+ # 4. LLM ATTITUDE MAPPING
91
+ # ============================================
92
  llm_attitude_mapping = {
93
  "brainstorm": "creative and encouraging",
94
  "design_iteration": "constructive and detailed, focusing on improvements",
 
97
  "random": "neutral and informative, perhaps suggesting a relevant stage",
98
  }
99
 
100
+ # ============================================
101
+ # 5. TEXT CLASSIFICATION FUNCTION
102
+ # ============================================
103
+ def perform_text_classification_and_format(text: str) -> tuple:
104
+ text_classification_formatted = "No text provided"
105
+ text_classification_probabilities = {"No High Concept": 0.0, "High Concept": 0.0}
106
+ predicted_text_label = "0"
107
+
108
+ if text and loaded_predictor_from_hub is not None and embedding_model is not None:
109
+ try:
110
+ embeddings = embedding_model.encode(
111
+ [text],
112
+ batch_size=1,
113
+ show_progress_bar=False,
114
+ convert_to_numpy=True,
115
+ normalize_embeddings=False,
116
+ )
117
+
118
+ n, d = embeddings.shape
119
+ text_df_processed = pd.DataFrame(embeddings, columns=[f"e{i}" for i in range(d)])
120
+
121
+ text_proba_df = loaded_predictor_from_hub.predict_proba(text_df_processed)
122
+
123
+ text_classification_probabilities = {
124
+ "No High Concept": float(text_proba_df.iloc[0].get("0", 0.0)),
125
+ "High Concept": float(text_proba_df.iloc[0].get("1", 0.0)),
126
+ }
127
+
128
+ predicted_text_label = str(loaded_predictor_from_hub.predict(text_df_processed).iloc[0])
129
+
130
+ if predicted_text_label == "1":
131
+ has_high_concept = "Yes"
132
+ confidence = text_classification_probabilities["High Concept"]
133
+ else:
134
+ has_high_concept = "No"
135
+ confidence = text_classification_probabilities["No High Concept"]
136
+
137
+ text_classification_formatted = f"High Concept: {has_high_concept} (Confidence: {confidence:.2f})"
138
+
139
+ except Exception as e:
140
+ print(f"Error processing text: {e}")
141
+ text_classification_formatted = f"Text classification failed: {e}"
142
+
143
+ return text_classification_formatted, text_classification_probabilities, predicted_text_label
144
+
145
+ # ============================================
146
+ # 6. COMBINED CLASSIFICATION FUNCTION
147
+ # ============================================
148
+ def perform_classification_and_format(image: Image.Image, text: str) -> tuple:
149
+ image_classification_results = {"error": "No image provided"}
150
+ design_stage = "unknown"
151
+
152
+ if image is not None and model is not None:
153
+ try:
154
+ img_tensor = TFM(image).unsqueeze(0).to(device)
155
+
156
+ with torch.no_grad():
157
+ img_output = model(img_tensor)
158
+
159
+ img_probabilities = torch.softmax(img_output, dim=1)[0]
160
+ predicted_class_index = torch.argmax(img_probabilities).item()
161
+ design_stage = class_names[predicted_class_index]
162
+
163
+ image_classification_results = {
164
+ class_names[i]: float(img_probabilities[i])
165
+ for i in range(len(class_names))
166
+ }
167
+
168
+ print(f"βœ“ Image classified as: {design_stage}")
169
+
170
+ except Exception as e:
171
+ print(f"❌ Error processing image: {e}")
172
+ image_classification_results = {"error": f"Image classification failed: {e}"}
173
+
174
+ text_classification_formatted, text_classification_probabilities, predicted_text_label = perform_text_classification_and_format(text)
175
+
176
+ return image_classification_results, text_classification_probabilities, text_classification_formatted, predicted_text_label
177
+
178
+ # ============================================
179
+ # 7. PROMPT GENERATION FUNCTION
180
+ # ============================================
181
+ def generate_prompt_only(image_classification_results: dict,
182
+ text_classification_probabilities: dict,
183
+ predicted_text_label: str,
184
+ text: str) -> str:
185
+ design_stage = "unknown"
186
+ if image_classification_results and "error" not in image_classification_results:
187
+ try:
188
+ design_stage = max(image_classification_results, key=image_classification_results.get)
189
+ except Exception:
190
+ design_stage = "unknown"
191
+
192
+ has_high_concept = "Unable to determine"
193
+ confidence = 0.0
194
+ if text_classification_probabilities and "error" not in text_classification_probabilities:
195
+ try:
196
+ if predicted_text_label == "1":
197
+ has_high_concept = "Yes"
198
+ confidence = text_classification_probabilities.get("High Concept", 0.0)
199
+ else:
200
+ has_high_concept = "No"
201
+ confidence = text_classification_probabilities.get("No High Concept", 0.0)
202
+ except Exception:
203
+ has_high_concept = "Unable to determine"
204
+ confidence = 0.0
205
+
206
  llm_attitude = llm_attitude_mapping.get(design_stage, llm_attitude_mapping["random"])
207
+
208
  prompt = f"""You are an abstract architecture critique interpreter.
209
  Your audience is a low-level architecture student.
210
+ The user is in the {design_stage} design stage, so your attitude should be {llm_attitude}.
211
+ The user's input {'contains' if has_high_concept == 'Yes' else 'does not contain'} abstract architectural concepts (confidence: {confidence:.2f}).
212
+
213
+ RULES:
214
+ - Write in English, strictly 250-350 words.
215
+ - MUST end with a complete sentence with proper punctuation.
216
+ - Never repeat any viewpoint or sentence.
217
+ - No slogans, catchphrases, or parallel sentence structures.
218
+ - No meta-commentary like "Output complete", "End of response", etc.
219
+ - Stop immediately after the final sentence ends.
220
 
221
+ User input: {text}
222
+
223
+ Explain abstract concepts using simple, everyday examples that a child could understand, and provide actionable suggestions.
224
+ """
225
+ return prompt
226
 
227
+ # ============================================
228
+ # 8. GEMINI FEEDBACK GENERATION
229
+ # ============================================
230
+ def generate_feedback_from_prompt(prompt_input: str) -> str:
231
+ if gemini_model is None:
232
+ return "⚠️ Gemini API not configured. Please set GEMINI_API_KEY in Hugging Face Spaces secrets."
233
+
234
+ try:
235
+ print("Generating feedback with Gemini...")
236
+
237
+ generation_config = genai.types.GenerationConfig(
238
+ temperature=0.7,
239
+ max_output_tokens=500,
240
+ top_p=0.9,
241
+ )
242
+
243
+ response = gemini_model.generate_content(
244
+ prompt_input,
245
+ generation_config=generation_config
246
+ )
247
+
248
+ llm_response_text = response.text.strip()
249
+
250
+ # Post-processing: Remove meta-commentary
251
+ meta_phrases = [
252
+ "Output complete", "End of output", "No more text",
253
+ "Final output", "Response complete", "βœ…"
254
+ ]
255
+ for phrase in meta_phrases:
256
+ if llm_response_text.endswith(phrase):
257
+ llm_response_text = llm_response_text[:-len(phrase)].strip()
258
+
259
+ print("βœ“ Feedback generated successfully")
260
+ return llm_response_text
261
+
262
+ except Exception as e:
263
+ print(f"❌ Error during Gemini interaction: {e}")
264
+ return f"Error generating feedback: {str(e)}"
265
 
266
+ # ============================================
267
+ # 9. GRADIO INTERFACE
268
+ # ============================================
269
  examples = [
270
+ ["https://balancedarchitecture.com/wp-content/uploads/2021/11/EXISTING-FIRST-FLOOR-PRES-scaled-e1635965923983.jpg",
271
+ "Exploring spatial relationships and material palettes."],
272
+ ["https://cdn.prod.website-files.com/5894a32730554b620f7bf36d/5e848c2d622e7abe1ad48504_5e01ce9f0d272014d0353cd1_Things-You-Need-to-Organize-a-3D-Rendering-Architectural-Project-EASY-RENDER.jpeg",
273
+ "The window size is too small."],
274
+ ["https://architectelevator.com/assets/img/bilbao_sketch.png",
275
+ "The facade expresses the building's relationship with the urban context."],
276
  ]
277
 
278
+ with gr.Blocks(css="""
279
+ .left-column, .middle-column, .right-column {min-width: 300px !important;}
280
+ .textbox-container textarea {min-height: 150px !important;}
281
+ """) as demo:
282
+
283
+ gr.Markdown("# πŸ›οΈ Architecture Feedback Generator (Powered by Gemini)")
284
+ gr.Markdown("""
285
+ Upload an architectural image and provide a text description or question.
286
+ The system will classify the design stage, analyze the text for high-level concepts,
287
+ generate a customized prompt, and provide AI-powered feedback using Google's Gemini.
288
+ """)
289
 
290
  with gr.Row():
291
+ # LEFT COLUMN - Input Section
292
+ with gr.Column(scale=1, elem_classes="left-column"):
293
+ gr.Markdown("### πŸ“₯ Input")
294
+ image_input = gr.Image(type="pil", label="Upload Architectural Image", height=300)
295
+ text_input = gr.Textbox(
296
+ label="Enter Text Description or Question",
297
+ placeholder="Describe your architectural design, ask questions, or provide context...",
298
+ lines=6,
299
+ elem_classes="textbox-container"
300
+ )
301
+ classify_button = gr.Button("πŸ” Classify & Generate Prompt", variant="primary", size="lg")
302
 
303
+ # MIDDLE COLUMN - Classification & Prompt Section
304
+ with gr.Column(scale=1, elem_classes="middle-column"):
305
+ gr.Markdown("### πŸ“Š Classification Results & Prompt")
306
+ image_output_label = gr.Label(
307
+ num_top_classes=len(class_names),
308
+ label="Image Classification (Design Stage)"
309
+ )
310
+ text_output_textbox = gr.Textbox(
311
+ label="Text Classification (High Concept Detection)",
312
+ lines=2,
313
+ elem_classes="textbox-container"
314
+ )
315
+ prompt_output_textbox = gr.Textbox(
316
+ label="Generated Prompt (Editable)",
317
+ lines=10,
318
+ interactive=True,
319
+ elem_classes="textbox-container"
320
+ )
321
+ generate_feedback_button = gr.Button("✨ Generate AI Feedback", variant="primary", size="lg")
322
 
323
+ # RIGHT COLUMN - Gemini Output Section
324
+ with gr.Column(scale=1, elem_classes="right-column"):
325
+ gr.Markdown("### πŸ€– AI-Generated Feedback")
326
+ llm_output_text = gr.Textbox(
327
+ label="Gemini Response",
328
+ lines=20,
329
+ elem_classes="textbox-container",
330
+ show_copy_button=True
331
+ )
332
+
333
+ # Hidden state variables
334
+ text_classification_probabilities_state = gr.State()
335
+ predicted_text_label_state = gr.State()
336
+
337
+ # Step 1: Classification
338
+ classification_outputs = classify_button.click(
339
+ fn=perform_classification_and_format,
340
+ inputs=[image_input, text_input],
341
+ outputs=[
342
+ image_output_label,
343
+ text_classification_probabilities_state,
344
+ text_output_textbox,
345
+ predicted_text_label_state
346
+ ]
347
+ )
348
+
349
+ # Step 2: Generate Prompt
350
+ def generate_prompt_wrapper(img_res, txt_prob, predicted_label, txt):
351
+ return generate_prompt_only(img_res, txt_prob, predicted_label, txt)
352
+
353
+ classification_outputs.then(
354
+ fn=generate_prompt_wrapper,
355
+ inputs=[
356
+ image_output_label,
357
+ text_classification_probabilities_state,
358
+ predicted_text_label_state,
359
+ text_input
360
+ ],
361
+ outputs=prompt_output_textbox
362
+ )
363
+
364
+ # Step 3: Gemini Feedback
365
+ generate_feedback_button.click(
366
+ fn=generate_feedback_from_prompt,
367
+ inputs=[prompt_output_textbox],
368
+ outputs=llm_output_text
369
+ )
370
+
371
+ # Examples Section
372
+ gr.Markdown("---")
373
+ gr.Markdown("### πŸ’‘ Example Inputs")
374
+
375
+ def generate_full_chain_output(img, txt):
376
+ img_res, txt_prob, txt_fmt, predicted_label = perform_classification_and_format(img, txt)
377
+ prompt = generate_prompt_only(img_res, txt_prob, predicted_label, txt)
378
+ llm_res = generate_feedback_from_prompt(prompt)
379
+ return img_res, txt_fmt, prompt, llm_res
380
 
381
+ gr.Examples(
382
+ examples=examples,
383
+ inputs=[image_input, text_input],
384
+ outputs=[image_output_label, text_output_textbox, prompt_output_textbox, llm_output_text],
385
+ fn=generate_full_chain_output,
386
+ cache_examples=False
387
+ )
388
 
389
  if __name__ == "__main__":
390
+ demo.launch()