Spaces:

kaitongg
/

architutor

Sleeping

App Files Files Community

kaitongg commited on Oct 4, 2025

Commit

bd28c5b

verified ·

1 Parent(s): 203f314

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -174

app.py CHANGED Viewed

@@ -1,78 +1,60 @@
 import os
 import shutil
 import json
-import zipfile
 import torch
 import timm
-import pickle
-import gradio as gr
 import pandas as pd
 import sentence_transformers
-import torchvision.transforms as T
-from PIL import Image
 from autogluon.tabular import TabularPredictor
 from huggingface_hub import hf_hub_download, snapshot_download
-from llama_cpp import Llama
 # ----------------------
-# Load Image Classification Model
 # ----------------------
-REPO_ID = "keerthikoganti/architecture-design-stages-compact-cnn"
-pkl_path = hf_hub_download(repo_id=REPO_ID, filename="model_bundle.pkl")
 with open(pkl_path, "rb") as f:
     bundle = pickle.load(f)
 architecture = bundle["architecture"]
 num_classes = bundle["num_classes"]
 class_names = bundle["class_names"]
 state_dict = bundle["state_dict"]
 device = "cpu"
 model = timm.create_model(architecture, pretrained=False, num_classes=num_classes)
 model.load_state_dict(state_dict)
 model.eval().to(device)
-TFM = T.Compose([
-    T.Resize(224),
-    T.CenterCrop(224),
-    T.ToTensor(),
-    T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
-])
 # ----------------------
-# Load Text Classification Model
 # ----------------------
-repo_id = "kaitongg/my-autogluon-model"
 download_dir = "downloaded_predictor"
 if os.path.exists(download_dir):
     shutil.rmtree(download_dir)
 os.makedirs(download_dir, exist_ok=True)
-snapshot_download(
-    repo_id=repo_id,
-    repo_type="model",
-    local_dir=download_dir,
-    local_dir_use_symlinks=False,
-)
-predictor_path = os.path.join(download_dir, "autogluon_predictor")
 loaded_predictor_from_hub = TabularPredictor.load(predictor_path)
 # ----------------------
-# Load LLM
 # ----------------------
-llm_model_id = "bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF"
-llm_filename = "Qwen_Qwen3-4B-Instruct-2507-Q4_K_M.gguf"
-llm = Llama.from_pretrained(
-    repo_id=llm_model_id,
-    filename=llm_filename,
-    n_ctx=4096,
-    n_threads=None,
-    logits_all=False,
-    verbose=False,
-)
 llm_attitude_mapping = {
     "brainstorm": "creative and encouraging",
     "design_iteration": "constructive and detailed, focusing on improvements",
@@ -82,105 +64,61 @@ llm_attitude_mapping = {
 }
 # ----------------------
-# Load Embedding Model
-# ----------------------
-try:
-    embedding_model = sentence_transformers.SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
-except Exception:
-    embedding_model = None
-# ----------------------
-# Functions
 # ----------------------
 def perform_text_classification_and_format(text: str):
-    text_classification_formatted = "No text provided"
-    text_classification_probabilities = {}
-    predicted_text_label = "0"
-    if text and loaded_predictor_from_hub is not None and embedding_model is not None:
-        embeddings = embedding_model.encode([text], convert_to_numpy=True)
-        n, d = embeddings.shape
-        text_df_processed = pd.DataFrame(embeddings, columns=[f"e{i}" for i in range(d)])
-        text_proba_df = loaded_predictor_from_hub.predict_proba(text_df_processed)
-        text_classification_probabilities = {
-            "No High Concept": float(text_proba_df.iloc[0].get("0", 0.0)),
-            "High Concept": float(text_proba_df.iloc[0].get("1", 0.0)),
-        }
-        predicted_text_label = str(loaded_predictor_from_hub.predict(text_df_processed).iloc[0])
-        if predicted_text_label == "1":
-            has_high_concept = "Yes"
-            confidence = text_classification_probabilities["High Concept"]
-        else:
-            has_high_concept = "No"
-            confidence = text_classification_probabilities["No High Concept"]
-        text_classification_formatted = f"High Concept: {has_high_concept} (Confidence: {confidence:.2f})"
-    return text_classification_formatted, text_classification_probabilities, predicted_text_label
 def perform_classification_and_format(image: Image.Image, text: str):
-    image_classification_results = {"error": "No image provided"}
-    design_stage = "unknown"
     if image is not None:
         img_tensor = TFM(image).unsqueeze(0).to(device)
         with torch.no_grad():
-            img_output = model(img_tensor)
-        img_probabilities = torch.softmax(img_output, dim=1)[0]
-        predicted_class_index = torch.argmax(img_probabilities).item()
-        design_stage = class_names[predicted_class_index]
-        image_classification_results = {class_names[i]: float(img_probabilities[i]) for i in range(len(class_names))}
-    text_classification_formatted, text_classification_probabilities, predicted_text_label = perform_text_classification_and_format(text)
-    return image_classification_results, text_classification_probabilities, text_classification_formatted
-def generate_prompt_only(image_classification_results, text_classification_probabilities, predicted_text_label, text: str):
-    design_stage = "unknown"
-    if image_classification_results and "error" not in image_classification_results:
-        design_stage = max(image_classification_results, key=image_classification_results.get)
-    has_high_concept = "No"
-    confidence = text_classification_probabilities.get("No High Concept", 0.0)
-    if predicted_text_label == "1":
-        has_high_concept = "Yes"
-        confidence = text_classification_probabilities.get("High Concept", 0.0)
     llm_attitude = llm_attitude_mapping.get(design_stage, llm_attitude_mapping["random"])
     prompt = f"""You are an abstract architecture critique interpreter.
 Your audience is a low-level architecture student.
-The user is currently in the {design_stage} design stage, so your attitude should be {llm_attitude}.
-The user's input contains abstract architectural concepts (Yes/No): {has_high_concept}.
-Rules:
-- Write a paragraph in English, strictly between 250-350 words.
-- End with a complete sentence.
-- Do not repeat any ideas or sentences.
-- Do not use slogans, mottos, or parallel structures.
-- Do not include phrases like 'final output', 'end of output', or meta-comments.
-- Do not add self-reflection or systematic remarks.
-- Stop immediately after the last sentence of the paragraph.
-Here is the user input text: {text}
-You must use simple language that a child could understand, provide everyday life examples to explain abstract concepts, and give actionable suggestions.
-"""
     return prompt
 def generate_feedback_from_prompt(prompt_input: str):
-    llm_response_text = "Error generating feedback from LLM."
-    if llm is not None:
-        output = llm.create_completion(
-            prompt=prompt_input,
-            max_tokens=350,
-            stop=["\n\n","<|im_end|>","Final", "Output", "No more"],
-            temperature=0.7,
-        )
-        if output and 'choices' in output and len(output['choices']) > 0 and 'text' in output['choices'][0]:
-            llm_response_text = output['choices'][0]['text'].strip()
-    return llm_response_text
 # ----------------------
-# Gradio Interface
 # ----------------------
 examples = [
     ["https://balancedarchitecture.com/wp-content/uploads/2021/11/EXISTING-FIRST-FLOOR-PRES-scaled-e1635965923983.jpg", "Exploring spatial relationships and material palettes."],
@@ -188,61 +126,28 @@ examples = [
     ["https://architectelevator.com/assets/img/bilbao_sketch.png", "The facade expresses the building's relationship with the urban context."],
 ]
-with gr.Blocks() as demo_step_by_step:
     gr.Markdown("# Architecture Feedback Generator (Step-by-Step)")
     with gr.Row():
         with gr.Column():
-            image_input = gr.Image(type="pil", label="Upload Architectural Image")
-            text_input = gr.Textbox(label="Enter Text Description or Question", lines=4)
-            classify_button = gr.Button("Perform Classification & Generate Prompt")
         with gr.Column():
-            image_output_label = gr.Label(num_top_classes=len(class_names), label="Image Classification Results")
-            text_output_textbox = gr.Textbox(label="Text Classification Results", lines=2)
-            text_classification_probabilities_state = gr.State()
-            prompt_output_textbox = gr.Textbox(label="Generated Prompt for LLM", interactive=True, lines=8)
-            generate_feedback_button = gr.Button("Generate Feedback from Prompt")
         with gr.Column():
-            llm_output_text = gr.Textbox(label="Generated Feedback", lines=12)
-    def dynamic_generate_prompt(img_res, txt_prob, txt):
-        predicted_label = "1" if txt_prob.get("High Concept",0) > txt_prob.get("No High Concept",0) else "0"
-        return generate_prompt_only(img_res, txt_prob, predicted_label, txt)
-    classification_outputs = classify_button.click(
-        fn=perform_classification_and_format,
-        inputs=[image_input, text_input],
-        outputs=[image_output_label, text_classification_probabilities_state, text_output_textbox]
-    )
-    classification_outputs.then(
-        fn=dynamic_generate_prompt,
-        inputs=[image_output_label, text_classification_probabilities_state, text_input],
-        outputs=prompt_output_textbox
-    )
-    generate_feedback_button.click(
-        fn=generate_feedback_from_prompt,
-        inputs=[prompt_output_textbox],
-        outputs=llm_output_text
-    )
-    def generate_full_chain_output_step_by_step(img, txt):
-        img_res, txt_prob, txt_fmt = perform_classification_and_format(img, txt)
-        predicted_label = "1" if txt_prob.get("High Concept",0) > txt_prob.get("No High Concept",0) else "0"
-        prompt = generate_prompt_only(img_res, txt_prob, predicted_label, txt)
-        llm_res = generate_feedback_from_prompt(prompt)
-        return img_res, txt_fmt, prompt, llm_res
-    gr.Examples(
-        examples=examples,
-        inputs=[image_input, text_input],
-        outputs=[image_output_label, text_output_textbox, prompt_output_textbox, llm_output_text],
-        fn=generate_full_chain_output_step_by_step,
-        cache_examples=False
-    )
 if __name__ == "__main__":
-    demo_step_by_step.launch()

 import os
 import shutil
 import json
+from PIL import Image
 import torch
+import torchvision.transforms as T
 import timm
 import pandas as pd
+import gradio as gr
 import sentence_transformers
 from autogluon.tabular import TabularPredictor
 from huggingface_hub import hf_hub_download, snapshot_download
+from openai import OpenAI
 # ----------------------
+# Load CPU-only image model
 # ----------------------
+REPO_ID_IMAGE = "keerthikoganti/architecture-design-stages-compact-cnn"
+pkl_path = hf_hub_download(repo_id=REPO_ID_IMAGE, filename="model_bundle.pkl")
 with open(pkl_path, "rb") as f:
     bundle = pickle.load(f)
 architecture = bundle["architecture"]
 num_classes = bundle["num_classes"]
 class_names = bundle["class_names"]
 state_dict = bundle["state_dict"]
 device = "cpu"
 model = timm.create_model(architecture, pretrained=False, num_classes=num_classes)
 model.load_state_dict(state_dict)
 model.eval().to(device)
+TFM = T.Compose([T.Resize(224), T.CenterCrop(224), T.ToTensor(), T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])])
 # ----------------------
+# Load CPU-only Autogluon predictor
 # ----------------------
+REPO_ID_AG = "kaitongg/my-autogluon-model"
 download_dir = "downloaded_predictor"
 if os.path.exists(download_dir):
     shutil.rmtree(download_dir)
 os.makedirs(download_dir, exist_ok=True)
+downloaded_path = snapshot_download(repo_id=REPO_ID_AG, repo_type="model", local_dir=download_dir, local_dir_use_symlinks=False)
+predictor_path = os.path.join(downloaded_path, "autogluon_predictor")
 loaded_predictor_from_hub = TabularPredictor.load(predictor_path)
 # ----------------------
+# Load sentence transformer
 # ----------------------
+embedding_model = sentence_transformers.SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# ----------------------
+# Set up Gemini API client
+# ----------------------
+GEMINI_API_KEY = os.environ.get("GEMINI_API_KEY")
+gemini_client = OpenAI(api_key=GEMINI_API_KEY)
+# ----------------------
+# LLM attitude mapping
+# ----------------------
 llm_attitude_mapping = {
     "brainstorm": "creative and encouraging",
     "design_iteration": "constructive and detailed, focusing on improvements",
 }
 # ----------------------
+# Functions: Text & Image classification, Prompt generation, LLM
 # ----------------------
 def perform_text_classification_and_format(text: str):
+    if not text:
+        return "No text provided", {}, "0"
+    embeddings = embedding_model.encode([text], convert_to_numpy=True)
+    df_emb = pd.DataFrame(embeddings, columns=[f"e{i}" for i in range(embeddings.shape[1])])
+    proba_df = loaded_predictor_from_hub.predict_proba(df_emb)
+    predicted_label = str(loaded_predictor_from_hub.predict(df_emb).iloc[0])
+    high_concept = "Yes" if predicted_label == "1" else "No"
+    confidence = float(proba_df.iloc[0]["1"] if predicted_label=="1" else proba_df.iloc[0]["0"])
+    formatted_text = f"High Concept: {high_concept} (Confidence: {confidence:.2f})"
+    proba_dict = {"High Concept": float(proba_df.iloc[0]["1"]), "No High Concept": float(proba_df.iloc[0]["0"])}
+    return formatted_text, proba_dict, predicted_label
 def perform_classification_and_format(image: Image.Image, text: str):
+    # Image classification
     if image is not None:
         img_tensor = TFM(image).unsqueeze(0).to(device)
         with torch.no_grad():
+            img_out = model(img_tensor)
+        img_probs = torch.softmax(img_out, dim=1)[0]
+        img_pred_idx = torch.argmax(img_probs).item()
+        design_stage = class_names[img_pred_idx]
+        img_results = {class_names[i]: float(img_probs[i]) for i in range(len(class_names))}
+    else:
+        design_stage = "unknown"
+        img_results = {"error": "No image provided"}
+    # Text classification
+    txt_fmt, txt_probs, predicted_label = perform_text_classification_and_format(text)
+    return img_results, txt_probs, txt_fmt
+def generate_prompt_only(img_results, txt_probs, predicted_label, text):
+    design_stage = max(img_results, key=img_results.get) if img_results and 'error' not in img_results else "unknown"
+    has_high_concept = "Yes" if predicted_label=="1" else "No"
+    confidence = txt_probs.get("High Concept",0.0) if predicted_label=="1" else txt_probs.get("No High Concept",0.0)
     llm_attitude = llm_attitude_mapping.get(design_stage, llm_attitude_mapping["random"])
     prompt = f"""You are an abstract architecture critique interpreter.
 Your audience is a low-level architecture student.
+The user is at the {design_stage} design stage, so your attitude should be {llm_attitude}.
+User input contains high concept: {has_high_concept}.
+Write 250-350 words in English with clear examples and actionable advice, ending with a complete sentence.
+{text}"""
     return prompt
 def generate_feedback_from_prompt(prompt_input: str):
+    response = gemini_client.chat.completions.create(model="gemini-1.5", messages=[{"role": "user", "content": prompt_input}], max_output_tokens=350, temperature=0.7)
+    return response.choices[0].message.content
 # ----------------------
+# Gradio UI
 # ----------------------
 examples = [
     ["https://balancedarchitecture.com/wp-content/uploads/2021/11/EXISTING-FIRST-FLOOR-PRES-scaled-e1635965923983.jpg", "Exploring spatial relationships and material palettes."],
     ["https://architectelevator.com/assets/img/bilbao_sketch.png", "The facade expresses the building's relationship with the urban context."],
 ]
+with gr.Blocks() as demo:
     gr.Markdown("# Architecture Feedback Generator (Step-by-Step)")
+    gr.Markdown("Upload an architectural image and provide a text description or question to see classification results and the generated prompt. Click 'Generate Feedback' to get the LLM's response.")
     with gr.Row():
         with gr.Column():
+            image_input = gr.Image(type="pil", label="Upload Image")
+            text_input = gr.Textbox(label="Enter Text", lines=4)
+            classify_btn = gr.Button("Classify & Generate Prompt")
         with gr.Column():
+            image_out = gr.Label(num_top_classes=len(class_names), label="Image Classification Results")
+            text_out = gr.Textbox(label="Text Classification Results", lines=4)
+            prompt_box = gr.Textbox(label="Generated Prompt (editable)", lines=6, interactive=True)
+            generate_feedback_btn = gr.Button("Generate Feedback")
         with gr.Column():
+            llm_out = gr.Textbox(label="LLM Feedback", lines=12)
+    classify_btn.click(fn=perform_classification_and_format, inputs=[image_input, text_input], outputs=[image_out, text_out, text_out])
+    generate_feedback_btn.click(fn=lambda p: generate_feedback_from_prompt(p), inputs=[prompt_box], outputs=[llm_out])
+    gr.Examples(examples=examples, inputs=[image_input,text_input], outputs=[image_out,text_out,prompt_box,llm_out], fn=lambda img,txt: (perform_classification_and_format(img,txt)[0], perform_classification_and_format(img,txt)[2], generate_prompt_only(*perform_classification_and_format(img,txt), txt), generate_feedback_from_prompt(generate_prompt_only(*perform_classification_and_format(img,txt), txt))), cache_examples=False)
 if __name__ == "__main__":
+    demo.launch()