Spaces:

kaitongg
/

architutor

Sleeping

App Files Files Community

kaitongg commited on Oct 3, 2025

Commit

3377508

verified ·

1 Parent(s): ec726a6

Upload app.py

Browse files

Files changed (1) hide show

app.py +243 -0

app.py ADDED Viewed

	@@ -0,0 +1,243 @@

+import os
+import shutil
+import json
+import zipfile
+import torch
+import timm
+import pickle
+import gradio as gr
+import pandas as pd
+import sentence_transformers
+import torchvision.transforms as T
+from PIL import Image
+from autogluon.tabular import TabularPredictor
+from huggingface_hub import hf_hub_download, snapshot_download
+from llama_cpp import Llama
+# ----------------------
+# Load Image Classification Model
+# ----------------------
+REPO_ID = "keerthikoganti/architecture-design-stages-compact-cnn"
+pkl_path = hf_hub_download(repo_id=REPO_ID, filename="model_bundle.pkl")
+with open(pkl_path, "rb") as f:
+    bundle = pickle.load(f)
+architecture = bundle["architecture"]
+num_classes = bundle["num_classes"]
+class_names = bundle["class_names"]
+state_dict = bundle["state_dict"]
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = timm.create_model(architecture, pretrained=False, num_classes=num_classes)
+model.load_state_dict(state_dict)
+model.eval().to(device)
+TFM = T.Compose([
+    T.Resize(224),
+    T.CenterCrop(224),
+    T.ToTensor(),
+    T.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
+])
+# ----------------------
+# Load Text Classification Model
+# ----------------------
+repo_id = "kaitongg/my-autogluon-model"
+download_dir = "downloaded_predictor"
+if os.path.exists(download_dir):
+    shutil.rmtree(download_dir)
+os.makedirs(download_dir, exist_ok=True)
+snapshot_download(
+    repo_id=repo_id,
+    repo_type="model",
+    local_dir=download_dir,
+    local_dir_use_symlinks=False,
+)
+predictor_path = os.path.join(download_dir, "autogluon_predictor")
+loaded_predictor_from_hub = TabularPredictor.load(predictor_path)
+# ----------------------
+# Load LLM
+# ----------------------
+llm_model_id = "bartowski/Qwen_Qwen3-4B-Instruct-2507-GGUF"
+llm_filename = "Qwen_Qwen3-4B-Instruct-2507-Q4_K_M.gguf"
+llm = Llama.from_pretrained(
+    repo_id=llm_model_id,
+    filename=llm_filename,
+    n_ctx=4096,
+    n_threads=None,
+    logits_all=False,
+    verbose=False,
+)
+llm_attitude_mapping = {
+    "brainstorm": "creative and encouraging",
+    "design_iteration": "constructive and detailed, focusing on improvements",
+    "design_optimization": "critical and focused on efficiency and refinement",
+    "final_review": "thorough and critical, evaluating completeness and adherence to requirements",
+    "random": "neutral and informative, perhaps suggesting a relevant stage",
+}
+# ----------------------
+# Load Embedding Model
+# ----------------------
+try:
+    embedding_model = sentence_transformers.SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+except Exception:
+    embedding_model = None
+# ----------------------
+# Functions
+# ----------------------
+def perform_text_classification_and_format(text: str):
+    text_classification_formatted = "No text provided"
+    text_classification_probabilities = {}
+    predicted_text_label = "0"
+    if text and loaded_predictor_from_hub is not None and embedding_model is not None:
+        embeddings = embedding_model.encode([text], convert_to_numpy=True)
+        n, d = embeddings.shape
+        text_df_processed = pd.DataFrame(embeddings, columns=[f"e{i}" for i in range(d)])
+        text_proba_df = loaded_predictor_from_hub.predict_proba(text_df_processed)
+        text_classification_probabilities = {
+            "No High Concept": float(text_proba_df.iloc[0].get("0", 0.0)),
+            "High Concept": float(text_proba_df.iloc[0].get("1", 0.0)),
+        }
+        predicted_text_label = str(loaded_predictor_from_hub.predict(text_df_processed).iloc[0])
+        if predicted_text_label == "1":
+            has_high_concept = "是"
+            confidence = text_classification_probabilities["High Concept"]
+        else:
+            has_high_concept = "否"
+            confidence = text_classification_probabilities["No High Concept"]
+        text_classification_formatted = f"High Concept: {has_high_concept} (Confidence: {confidence:.2f})"
+    return text_classification_formatted, text_classification_probabilities, predicted_text_label
+def perform_classification_and_format(image: Image.Image, text: str):
+    image_classification_results = {"error": "No image provided"}
+    design_stage = "unknown"
+    if image is not None:
+        img_tensor = TFM(image).unsqueeze(0).to(device)
+        with torch.no_grad():
+            img_output = model(img_tensor)
+        img_probabilities = torch.softmax(img_output, dim=1)[0]
+        predicted_class_index = torch.argmax(img_probabilities).item()
+        design_stage = class_names[predicted_class_index]
+        image_classification_results = {class_names[i]: float(img_probabilities[i]) for i in range(len(class_names))}
+    text_classification_formatted, text_classification_probabilities, predicted_text_label = perform_text_classification_and_format(text)
+    return image_classification_results, text_classification_probabilities, text_classification_formatted
+def generate_prompt_only(image_classification_results, text_classification_probabilities, predicted_text_label, text: str):
+    design_stage = "unknown"
+    if image_classification_results and "error" not in image_classification_results:
+        design_stage = max(image_classification_results, key=image_classification_results.get)
+    has_high_concept = "否"
+    confidence = text_classification_probabilities.get("No High Concept", 0.0)
+    if predicted_text_label == "1":
+        has_high_concept = "是"
+        confidence = text_classification_probabilities.get("High Concept", 0.0)
+    llm_attitude = llm_attitude_mapping.get(design_stage, llm_attitude_mapping["random"])
+    prompt = f"""You are an abstract architecture critique interpreter.
+Your audience is a low-level architecture student.
+已知用户处于{design_stage}设计阶段，所以你的态度应该要{llm_attitude}。
+已知用户输入的结果（是/否）含有抽象建筑学概念：{has_high_concept}。
+牢记规则：
+- 撰写一段英文，严格控制在250-350字。
+- 文末必须以完整句子收尾。
+- 不得重复任何观点或句子。
+- 禁止使用警句、口号或平行句式。
+- 不得出现“最终输出”、‘输出结束’、“无后续文本”等元注释。
+- 禁止添加自我反思或系统性备注。
+- 段落末句结束后立即终止输出。
+以下是用户输入的文本内容：{text}你需要用儿童都懂的语言，举生活中的例子给用户解释抽象概念，并且给出可操作的建议。
+"""
+    return prompt
+def generate_feedback_from_prompt(prompt_input: str):
+    llm_response_text = "Error generating feedback from LLM."
+    if llm is not None:
+        output = llm.create_completion(
+            prompt=prompt_input,
+            max_tokens=350,
+            stop=["\n\n","<|im_end|>","Final", "Output", "No more"],
+            temperature=0.7,
+        )
+        if output and 'choices' in output and len(output['choices']) > 0 and 'text' in output['choices'][0]:
+            llm_response_text = output['choices'][0]['text'].strip()
+    return llm_response_text
+# ----------------------
+# Gradio Interface
+# ----------------------
+examples = [
+    ["https://balancedarchitecture.com/wp-content/uploads/2021/11/EXISTING-FIRST-FLOOR-PRES-scaled-e1635965923983.jpg", "Exploring spatial relationships and material palettes."],
+    ["https://cdn.prod.website-files.com/5894a32730554b620f7bf36d/5e848c2d622e7abe1ad48504_5e01ce9f0d272014d0353cd1_Things-You-Need-to-Organize-a-3D-Rendering-Architectural-Project-EASY-RENDER.jpeg", "The window size is too small."],
+    ["https://architectelevator.com/assets/img/bilbao_sketch.png", "The facade expresses the building's relationship with the urban context."],
+]
+with gr.Blocks() as demo_step_by_step:
+    gr.Markdown("# Architecture Feedback Generator (Step-by-Step)")
+    with gr.Row():
+        image_input = gr.Image(type="pil", label="Upload Architectural Image")
+        text_input = gr.Textbox(label="Enter Text Description or Question")
+    classify_button = gr.Button("Perform Classification & Generate Prompt")
+    image_output_label = gr.Label(num_top_classes=len(class_names), label="Image Classification Results")
+    text_output_textbox = gr.Textbox(label="Text Classification Results")
+    text_classification_probabilities_state = gr.State()
+    prompt_output_textbox = gr.Textbox(label="Generated Prompt for LLM", interactive=True)
+    generate_feedback_button = gr.Button("Generate Feedback from Prompt")
+    llm_output_text = gr.Textbox(label="Generated Feedback")
+    def dynamic_generate_prompt(img_res, txt_prob, txt):
+        predicted_label = "1" if txt_prob.get("High Concept",0) > txt_prob.get("No High Concept",0) else "0"
+        return generate_prompt_only(img_res, txt_prob, predicted_label, txt)
+    classification_outputs = classify_button.click(
+        fn=perform_classification_and_format,
+        inputs=[image_input, text_input],
+        outputs=[image_output_label, text_classification_probabilities_state, text_output_textbox]
+    )
+    classification_outputs.then(
+        fn=dynamic_generate_prompt,
+        inputs=[image_output_label, text_classification_probabilities_state, text_input],
+        outputs=prompt_output_textbox
+    )
+    generate_feedback_button.click(
+        fn=generate_feedback_from_prompt,
+        inputs=[prompt_output_textbox],
+        outputs=llm_output_text
+    )
+    def generate_full_chain_output_step_by_step(img, txt):
+        img_res, txt_prob, txt_fmt = perform_classification_and_format(img, txt)
+        predicted_label = "1" if txt_prob.get("High Concept",0) > txt_prob.get("No High Concept",0) else "0"
+        prompt = generate_prompt_only(img_res, txt_prob, predicted_label, txt)
+        llm_res = generate_feedback_from_prompt(prompt)
+        return img_res, txt_fmt, prompt, llm_res
+    gr.Examples(
+        examples=examples,
+        inputs=[image_input, text_input],
+        outputs=[image_output_label, text_output_textbox, prompt_output_textbox, llm_output_text],
+        fn=generate_full_chain_output_step_by_step,
+        cache_examples=False
+    )
+if __name__ == "__main__":
+    demo_step_by_step.launch()