kaitongg commited on
Commit
bd28c5b
·
verified ·
1 Parent(s): 203f314

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -174
app.py CHANGED
@@ -1,78 +1,60 @@
1
  import os
2
  import shutil
3
  import json
4
- import zipfile
5
  import torch
 
6
  import timm
7
- import pickle
8
- import gradio as gr
9
  import pandas as pd
 
10
  import sentence_transformers
11
- import torchvision.transforms as T
12
- from PIL import Image
13
  from autogluon.tabular import TabularPredictor
14
  from huggingface_hub import hf_hub_download, snapshot_download
15
- from llama_cpp import Llama
16
 
17
  # ----------------------
18
- # Load Image Classification Model
19
  # ----------------------
20
- REPO_ID = "keerthikoganti/architecture-design-stages-compact-cnn"
21
- pkl_path = hf_hub_download(repo_id=REPO_ID, filename="model_bundle.pkl")
22
  with open(pkl_path, "rb") as f:
23
  bundle = pickle.load(f)
24
-
25
  architecture = bundle["architecture"]
26
  num_classes = bundle["num_classes"]
27
  class_names = bundle["class_names"]
28
  state_dict = bundle["state_dict"]
29
-
30
  device = "cpu"
31
  model = timm.create_model(architecture, pretrained=False, num_classes=num_classes)
32
  model.load_state_dict(state_dict)
33
  model.eval().to(device)
34
-
35
- TFM = T.Compose([
36
- T.Resize(224),
37
- T.CenterCrop(224),
38
- T.ToTensor(),
39
- T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
40
- ])
41
 
42
  # ----------------------
43
- # Load Text Classification Model
44
  # ----------------------
45
- repo_id = "kaitongg/my-autogluon-model"
46
  download_dir = "downloaded_predictor"
47
  if os.path.exists(download_dir):
48
  shutil.rmtree(download_dir)
49
  os.makedirs(download_dir, exist_ok=True)
50
-
51
- snapshot_download(
52
- repo_id=repo_id,
53
- repo_type="model",
54
- local_dir=download_dir,
55
- local_dir_use_symlinks=False,
56
- )
57
-
58
- predictor_path = os.path.join(download_dir, "autogluon_predictor")
59
  loaded_predictor_from_hub = TabularPredictor.load(predictor_path)
60
 
61
  # ----------------------
62
- # Load LLM
63
  # ----------------------
64
- llm_model_id = "bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF"
65
- llm_filename = "Qwen_Qwen3-4B-Instruct-2507-Q4_K_M.gguf"
66
-
67
- llm = Llama.from_pretrained(
68
- repo_id=llm_model_id,
69
- filename=llm_filename,
70
- n_ctx=4096,
71
- n_threads=None,
72
- logits_all=False,
73
- verbose=False,
74
- )
75
 
 
 
 
 
 
 
 
 
 
76
  llm_attitude_mapping = {
77
  "brainstorm": "creative and encouraging",
78
  "design_iteration": "constructive and detailed, focusing on improvements",
@@ -82,105 +64,61 @@ llm_attitude_mapping = {
82
  }
83
 
84
  # ----------------------
85
- # Load Embedding Model
86
- # ----------------------
87
- try:
88
- embedding_model = sentence_transformers.SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
89
- except Exception:
90
- embedding_model = None
91
-
92
- # ----------------------
93
- # Functions
94
  # ----------------------
95
  def perform_text_classification_and_format(text: str):
96
- text_classification_formatted = "No text provided"
97
- text_classification_probabilities = {}
98
- predicted_text_label = "0"
99
-
100
- if text and loaded_predictor_from_hub is not None and embedding_model is not None:
101
- embeddings = embedding_model.encode([text], convert_to_numpy=True)
102
- n, d = embeddings.shape
103
- text_df_processed = pd.DataFrame(embeddings, columns=[f"e{i}" for i in range(d)])
104
-
105
- text_proba_df = loaded_predictor_from_hub.predict_proba(text_df_processed)
106
- text_classification_probabilities = {
107
- "No High Concept": float(text_proba_df.iloc[0].get("0", 0.0)),
108
- "High Concept": float(text_proba_df.iloc[0].get("1", 0.0)),
109
- }
110
-
111
- predicted_text_label = str(loaded_predictor_from_hub.predict(text_df_processed).iloc[0])
112
- if predicted_text_label == "1":
113
- has_high_concept = "Yes"
114
- confidence = text_classification_probabilities["High Concept"]
115
- else:
116
- has_high_concept = "No"
117
- confidence = text_classification_probabilities["No High Concept"]
118
-
119
- text_classification_formatted = f"High Concept: {has_high_concept} (Confidence: {confidence:.2f})"
120
 
121
- return text_classification_formatted, text_classification_probabilities, predicted_text_label
122
 
123
  def perform_classification_and_format(image: Image.Image, text: str):
124
- image_classification_results = {"error": "No image provided"}
125
- design_stage = "unknown"
126
-
127
  if image is not None:
128
  img_tensor = TFM(image).unsqueeze(0).to(device)
129
  with torch.no_grad():
130
- img_output = model(img_tensor)
131
- img_probabilities = torch.softmax(img_output, dim=1)[0]
132
- predicted_class_index = torch.argmax(img_probabilities).item()
133
- design_stage = class_names[predicted_class_index]
134
- image_classification_results = {class_names[i]: float(img_probabilities[i]) for i in range(len(class_names))}
135
-
136
- text_classification_formatted, text_classification_probabilities, predicted_text_label = perform_text_classification_and_format(text)
137
- return image_classification_results, text_classification_probabilities, text_classification_formatted
138
-
139
- def generate_prompt_only(image_classification_results, text_classification_probabilities, predicted_text_label, text: str):
140
- design_stage = "unknown"
141
- if image_classification_results and "error" not in image_classification_results:
142
- design_stage = max(image_classification_results, key=image_classification_results.get)
143
-
144
- has_high_concept = "No"
145
- confidence = text_classification_probabilities.get("No High Concept", 0.0)
146
- if predicted_text_label == "1":
147
- has_high_concept = "Yes"
148
- confidence = text_classification_probabilities.get("High Concept", 0.0)
149
-
150
  llm_attitude = llm_attitude_mapping.get(design_stage, llm_attitude_mapping["random"])
151
-
152
  prompt = f"""You are an abstract architecture critique interpreter.
153
  Your audience is a low-level architecture student.
154
- The user is currently in the {design_stage} design stage, so your attitude should be {llm_attitude}.
155
- The user's input contains abstract architectural concepts (Yes/No): {has_high_concept}.
156
- Rules:
157
- - Write a paragraph in English, strictly between 250-350 words.
158
- - End with a complete sentence.
159
- - Do not repeat any ideas or sentences.
160
- - Do not use slogans, mottos, or parallel structures.
161
- - Do not include phrases like 'final output', 'end of output', or meta-comments.
162
- - Do not add self-reflection or systematic remarks.
163
- - Stop immediately after the last sentence of the paragraph.
164
- Here is the user input text: {text}
165
- You must use simple language that a child could understand, provide everyday life examples to explain abstract concepts, and give actionable suggestions.
166
- """
167
  return prompt
168
 
 
169
  def generate_feedback_from_prompt(prompt_input: str):
170
- llm_response_text = "Error generating feedback from LLM."
171
- if llm is not None:
172
- output = llm.create_completion(
173
- prompt=prompt_input,
174
- max_tokens=350,
175
- stop=["\n\n","<|im_end|>","Final", "Output", "No more"],
176
- temperature=0.7,
177
- )
178
- if output and 'choices' in output and len(output['choices']) > 0 and 'text' in output['choices'][0]:
179
- llm_response_text = output['choices'][0]['text'].strip()
180
- return llm_response_text
181
 
182
  # ----------------------
183
- # Gradio Interface
184
  # ----------------------
185
  examples = [
186
  ["https://balancedarchitecture.com/wp-content/uploads/2021/11/EXISTING-FIRST-FLOOR-PRES-scaled-e1635965923983.jpg", "Exploring spatial relationships and material palettes."],
@@ -188,61 +126,28 @@ examples = [
188
  ["https://architectelevator.com/assets/img/bilbao_sketch.png", "The facade expresses the building's relationship with the urban context."],
189
  ]
190
 
191
- with gr.Blocks() as demo_step_by_step:
192
  gr.Markdown("# Architecture Feedback Generator (Step-by-Step)")
 
193
 
194
  with gr.Row():
195
  with gr.Column():
196
- image_input = gr.Image(type="pil", label="Upload Architectural Image")
197
- text_input = gr.Textbox(label="Enter Text Description or Question", lines=4)
198
- classify_button = gr.Button("Perform Classification & Generate Prompt")
199
-
200
  with gr.Column():
201
- image_output_label = gr.Label(num_top_classes=len(class_names), label="Image Classification Results")
202
- text_output_textbox = gr.Textbox(label="Text Classification Results", lines=2)
203
- text_classification_probabilities_state = gr.State()
204
- prompt_output_textbox = gr.Textbox(label="Generated Prompt for LLM", interactive=True, lines=8)
205
- generate_feedback_button = gr.Button("Generate Feedback from Prompt")
206
-
207
  with gr.Column():
208
- llm_output_text = gr.Textbox(label="Generated Feedback", lines=12)
209
-
210
- def dynamic_generate_prompt(img_res, txt_prob, txt):
211
- predicted_label = "1" if txt_prob.get("High Concept",0) > txt_prob.get("No High Concept",0) else "0"
212
- return generate_prompt_only(img_res, txt_prob, predicted_label, txt)
213
-
214
- classification_outputs = classify_button.click(
215
- fn=perform_classification_and_format,
216
- inputs=[image_input, text_input],
217
- outputs=[image_output_label, text_classification_probabilities_state, text_output_textbox]
218
- )
219
-
220
- classification_outputs.then(
221
- fn=dynamic_generate_prompt,
222
- inputs=[image_output_label, text_classification_probabilities_state, text_input],
223
- outputs=prompt_output_textbox
224
- )
225
-
226
- generate_feedback_button.click(
227
- fn=generate_feedback_from_prompt,
228
- inputs=[prompt_output_textbox],
229
- outputs=llm_output_text
230
- )
231
-
232
- def generate_full_chain_output_step_by_step(img, txt):
233
- img_res, txt_prob, txt_fmt = perform_classification_and_format(img, txt)
234
- predicted_label = "1" if txt_prob.get("High Concept",0) > txt_prob.get("No High Concept",0) else "0"
235
- prompt = generate_prompt_only(img_res, txt_prob, predicted_label, txt)
236
- llm_res = generate_feedback_from_prompt(prompt)
237
- return img_res, txt_fmt, prompt, llm_res
238
-
239
- gr.Examples(
240
- examples=examples,
241
- inputs=[image_input, text_input],
242
- outputs=[image_output_label, text_output_textbox, prompt_output_textbox, llm_output_text],
243
- fn=generate_full_chain_output_step_by_step,
244
- cache_examples=False
245
- )
246
 
247
  if __name__ == "__main__":
248
- demo_step_by_step.launch()
 
1
  import os
2
  import shutil
3
  import json
4
+ from PIL import Image
5
  import torch
6
+ import torchvision.transforms as T
7
  import timm
 
 
8
  import pandas as pd
9
+ import gradio as gr
10
  import sentence_transformers
 
 
11
  from autogluon.tabular import TabularPredictor
12
  from huggingface_hub import hf_hub_download, snapshot_download
13
+ from openai import OpenAI
14
 
15
  # ----------------------
16
+ # Load CPU-only image model
17
  # ----------------------
18
+ REPO_ID_IMAGE = "keerthikoganti/architecture-design-stages-compact-cnn"
19
+ pkl_path = hf_hub_download(repo_id=REPO_ID_IMAGE, filename="model_bundle.pkl")
20
  with open(pkl_path, "rb") as f:
21
  bundle = pickle.load(f)
 
22
  architecture = bundle["architecture"]
23
  num_classes = bundle["num_classes"]
24
  class_names = bundle["class_names"]
25
  state_dict = bundle["state_dict"]
 
26
  device = "cpu"
27
  model = timm.create_model(architecture, pretrained=False, num_classes=num_classes)
28
  model.load_state_dict(state_dict)
29
  model.eval().to(device)
30
+ TFM = T.Compose([T.Resize(224), T.CenterCrop(224), T.ToTensor(), T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
 
 
 
 
 
 
31
 
32
  # ----------------------
33
+ # Load CPU-only Autogluon predictor
34
  # ----------------------
35
+ REPO_ID_AG = "kaitongg/my-autogluon-model"
36
  download_dir = "downloaded_predictor"
37
  if os.path.exists(download_dir):
38
  shutil.rmtree(download_dir)
39
  os.makedirs(download_dir, exist_ok=True)
40
+ downloaded_path = snapshot_download(repo_id=REPO_ID_AG, repo_type="model", local_dir=download_dir, local_dir_use_symlinks=False)
41
+ predictor_path = os.path.join(downloaded_path, "autogluon_predictor")
 
 
 
 
 
 
 
42
  loaded_predictor_from_hub = TabularPredictor.load(predictor_path)
43
 
44
  # ----------------------
45
+ # Load sentence transformer
46
  # ----------------------
47
+ embedding_model = sentence_transformers.SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # ----------------------
50
+ # Set up Gemini API client
51
+ # ----------------------
52
+ GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
53
+ gemini_client = OpenAI(api_key=GEMINI_API_KEY)
54
+
55
+ # ----------------------
56
+ # LLM attitude mapping
57
+ # ----------------------
58
  llm_attitude_mapping = {
59
  "brainstorm": "creative and encouraging",
60
  "design_iteration": "constructive and detailed, focusing on improvements",
 
64
  }
65
 
66
  # ----------------------
67
+ # Functions: Text & Image classification, Prompt generation, LLM
 
 
 
 
 
 
 
 
68
  # ----------------------
69
  def perform_text_classification_and_format(text: str):
70
+ if not text:
71
+ return "No text provided", {}, "0"
72
+ embeddings = embedding_model.encode([text], convert_to_numpy=True)
73
+ df_emb = pd.DataFrame(embeddings, columns=[f"e{i}" for i in range(embeddings.shape[1])])
74
+ proba_df = loaded_predictor_from_hub.predict_proba(df_emb)
75
+ predicted_label = str(loaded_predictor_from_hub.predict(df_emb).iloc[0])
76
+ high_concept = "Yes" if predicted_label == "1" else "No"
77
+ confidence = float(proba_df.iloc[0]["1"] if predicted_label=="1" else proba_df.iloc[0]["0"])
78
+ formatted_text = f"High Concept: {high_concept} (Confidence: {confidence:.2f})"
79
+ proba_dict = {"High Concept": float(proba_df.iloc[0]["1"]), "No High Concept": float(proba_df.iloc[0]["0"])}
80
+ return formatted_text, proba_dict, predicted_label
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
 
82
 
83
  def perform_classification_and_format(image: Image.Image, text: str):
84
+ # Image classification
 
 
85
  if image is not None:
86
  img_tensor = TFM(image).unsqueeze(0).to(device)
87
  with torch.no_grad():
88
+ img_out = model(img_tensor)
89
+ img_probs = torch.softmax(img_out, dim=1)[0]
90
+ img_pred_idx = torch.argmax(img_probs).item()
91
+ design_stage = class_names[img_pred_idx]
92
+ img_results = {class_names[i]: float(img_probs[i]) for i in range(len(class_names))}
93
+ else:
94
+ design_stage = "unknown"
95
+ img_results = {"error": "No image provided"}
96
+
97
+ # Text classification
98
+ txt_fmt, txt_probs, predicted_label = perform_text_classification_and_format(text)
99
+ return img_results, txt_probs, txt_fmt
100
+
101
+
102
+ def generate_prompt_only(img_results, txt_probs, predicted_label, text):
103
+ design_stage = max(img_results, key=img_results.get) if img_results and 'error' not in img_results else "unknown"
104
+ has_high_concept = "Yes" if predicted_label=="1" else "No"
105
+ confidence = txt_probs.get("High Concept",0.0) if predicted_label=="1" else txt_probs.get("No High Concept",0.0)
 
 
106
  llm_attitude = llm_attitude_mapping.get(design_stage, llm_attitude_mapping["random"])
 
107
  prompt = f"""You are an abstract architecture critique interpreter.
108
  Your audience is a low-level architecture student.
109
+ The user is at the {design_stage} design stage, so your attitude should be {llm_attitude}.
110
+ User input contains high concept: {has_high_concept}.
111
+ Write 250-350 words in English with clear examples and actionable advice, ending with a complete sentence.
112
+ {text}"""
 
 
 
 
 
 
 
 
 
113
  return prompt
114
 
115
+
116
  def generate_feedback_from_prompt(prompt_input: str):
117
+ response = gemini_client.chat.completions.create(model="gemini-1.5", messages=[{"role": "user", "content": prompt_input}], max_output_tokens=350, temperature=0.7)
118
+ return response.choices[0].message.content
 
 
 
 
 
 
 
 
 
119
 
120
  # ----------------------
121
+ # Gradio UI
122
  # ----------------------
123
  examples = [
124
  ["https://balancedarchitecture.com/wp-content/uploads/2021/11/EXISTING-FIRST-FLOOR-PRES-scaled-e1635965923983.jpg", "Exploring spatial relationships and material palettes."],
 
126
  ["https://architectelevator.com/assets/img/bilbao_sketch.png", "The facade expresses the building's relationship with the urban context."],
127
  ]
128
 
129
+ with gr.Blocks() as demo:
130
  gr.Markdown("# Architecture Feedback Generator (Step-by-Step)")
131
+ gr.Markdown("Upload an architectural image and provide a text description or question to see classification results and the generated prompt. Click 'Generate Feedback' to get the LLM's response.")
132
 
133
  with gr.Row():
134
  with gr.Column():
135
+ image_input = gr.Image(type="pil", label="Upload Image")
136
+ text_input = gr.Textbox(label="Enter Text", lines=4)
137
+ classify_btn = gr.Button("Classify & Generate Prompt")
138
+
139
  with gr.Column():
140
+ image_out = gr.Label(num_top_classes=len(class_names), label="Image Classification Results")
141
+ text_out = gr.Textbox(label="Text Classification Results", lines=4)
142
+ prompt_box = gr.Textbox(label="Generated Prompt (editable)", lines=6, interactive=True)
143
+ generate_feedback_btn = gr.Button("Generate Feedback")
144
+
 
145
  with gr.Column():
146
+ llm_out = gr.Textbox(label="LLM Feedback", lines=12)
147
+
148
+ classify_btn.click(fn=perform_classification_and_format, inputs=[image_input, text_input], outputs=[image_out, text_out, text_out])
149
+ generate_feedback_btn.click(fn=lambda p: generate_feedback_from_prompt(p), inputs=[prompt_box], outputs=[llm_out])
150
+ gr.Examples(examples=examples, inputs=[image_input,text_input], outputs=[image_out,text_out,prompt_box,llm_out], fn=lambda img,txt: (perform_classification_and_format(img,txt)[0], perform_classification_and_format(img,txt)[2], generate_prompt_only(*perform_classification_and_format(img,txt), txt), generate_feedback_from_prompt(generate_prompt_only(*perform_classification_and_format(img,txt), txt))), cache_examples=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
  if __name__ == "__main__":
153
+ demo.launch()