Spaces:

TeeA
/

Datum-3D

Running

App Files Files Community

TeeA commited on May 28, 2025

Commit

eca560c

1 Parent(s): 182cad3

integrate ai agents

Browse files

Files changed (2) hide show

app.py +252 -20
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 import platform
 import re
 import subprocess  # used to connect to FreeCAD via terminal sub process
@@ -12,6 +13,8 @@ import numpy as np
 import torch
 import torchvision.transforms.functional as TF
 import trimesh
 from llama_index.embeddings.clip import ClipEmbedding
 from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingMode
 from loguru import logger
@@ -207,39 +210,260 @@ def search_3D_similarity(filepath: str, embedding_dict: dict, top_k: int = 4):
 ####################################################################################################################
 # Text-based Query
 ####################################################################################################################
-def query_3D_object(query: str, embedding_dict: dict, top_k: int = 4):
     if query == "":
         raise gr.Error("Query cannot be empty!")
     if len(embedding_dict) < 4:
         raise gr.Error("Require at least 4 3D files to query by features")
     features1 = np.array(text_embedding_model.get_text_embedding(text=query)).reshape(
         1, -1
     )
-    # List to store (path, similarity)
     valid_items = [
         (fp, data["text_embedding"])
         for fp, data in embedding_dict.items()
         if "text_embedding" in data
     ]
     filepaths = [fp for fp, _ in valid_items]
-    feature_matrix = np.array([feat for _, feat in valid_items])  # shape: (N, D)
-    similarities = cosine_similarity(features1, feature_matrix)[0]  # shape: (N,)
     scores = list(zip(filepaths, similarities))
-    # Sort by similarity in descending order
     scores.sort(key=lambda x: x[1], reverse=True)
-    if len(scores) < 4:
-        scores.append(("", 0.0))
-    # Return top_k results
-    return [x[0] for x in scores[:top_k]] + [
-        os.path.basename(x[0]) for x in scores[:top_k]
     ]
 ####################################################################################################################
@@ -489,10 +713,14 @@ async def embedding_3d_object(obj_path: str) -> Dict[str, Any]:
 BASE_SAMPLE_DIR = "/Users/tridoan/Spartan/Datum/service-ai/poc/3D/gradio_cache/"
 sample_files = [
-    #  BASE_SAMPLE_DIR + "C5 Knuckle Object.obj",
-    #  BASE_SAMPLE_DIR + "NEMA 17 Stepper Motor 23mm-NEMA 17 Stepper Motor 23mm.obj",
-    #  BASE_SAMPLE_DIR + "TS6-THT_H-5.0.obj",
-    #  BASE_SAMPLE_DIR + "TS6-THT_H-11.0.obj"
 ]
@@ -546,10 +774,13 @@ async def accumulate_and_embedding(input_files, file_list, embedding_dict):
             + f".\n {'n' * 20}\nMetadata: "
             + metadata
         )
         # store embeddings and metadata
         embedding_dict[obj_path]["metadata"] = metadata
         embedding_dict[obj_path]["metadata_dictionary"] = normalize_metadata(
-            metadata_aggregation.update(metadata_extraction)  # type: ignore
         )
         embedding_dict[obj_path]["description"] = embeddings["description"]
         embedding_dict[obj_path]["image_embedding"] = embeddings["image_embedding"]
@@ -658,7 +889,7 @@ with gr.Blocks() as demo:
     # query button
     query_button.click(
         query_3D_object,
-        [query_input, embedding_store],
         [
             model_q_1,
             model_q_2,
@@ -723,4 +954,5 @@ with gr.Blocks() as demo:
     )
 if __name__ == "__main__":
-    demo.launch(share=True, debug=True)

 import os
+from enum import Enum
 import platform
 import re
 import subprocess  # used to connect to FreeCAD via terminal sub process
 import torch
 import torchvision.transforms.functional as TF
 import trimesh
+import ast
+from agents import Agent, Runner, function_tool
 from llama_index.embeddings.clip import ClipEmbedding
 from llama_index.embeddings.openai import OpenAIEmbedding, OpenAIEmbeddingMode
 from loguru import logger
 ####################################################################################################################
 # Text-based Query
 ####################################################################################################################
+class Query3DObjectMethod(Enum):
+    HYBRID_SEARCH = "hybrid_search"  # using multiple agents to query 3D objects
+    SEMANTIC_SEARCH = "semantic_search"
+async def query_3D_object(
+    query: str,
+    current_obj_path: str,
+    embedding_dict: dict,
+    top_k: int = 4,
+    method: Query3DObjectMethod = Query3DObjectMethod.SEMANTIC_SEARCH,
+) -> List:
     if query == "":
         raise gr.Error("Query cannot be empty!")
     if len(embedding_dict) < 4:
         raise gr.Error("Require at least 4 3D files to query by features")
+    if method == Query3DObjectMethod.HYBRID_SEARCH:
+        logger.info("Running query_3D_object_by_hybrid_search_method")
+        return await query_3D_object_by_hybrid_search_method(
+            query, current_obj_path, embedding_dict, top_k
+        )
+    elif method == Query3DObjectMethod.SEMANTIC_SEARCH:
+        logger.info("Running query_3D_object_by_semantic_search_method")
+        return query_3D_object_by_semantic_search_method(
+            query, current_obj_path, embedding_dict, top_k
+        )
+def query_3D_object_by_semantic_search_method(
+    query: str, current_obj_path: str, embedding_dict: dict, top_k: int = 4
+) -> List:
     features1 = np.array(text_embedding_model.get_text_embedding(text=query)).reshape(
         1, -1
     )
     valid_items = [
         (fp, data["text_embedding"])
         for fp, data in embedding_dict.items()
         if "text_embedding" in data
     ]
     filepaths = [fp for fp, _ in valid_items]
+    feature_matrix = np.array([feat for _, feat in valid_items])
+    similarities = cosine_similarity(features1, feature_matrix)[0]
     scores = list(zip(filepaths, similarities))
     scores.sort(key=lambda x: x[1], reverse=True)
+    if len(scores) < top_k:
+        scores.extend([("", 0.0)] * (top_k - len(scores)))
+    top_files = [x[0] for x in scores[:top_k]]
+    return top_files + [os.path.basename(x) for x in top_files]
+async def query_3D_object_by_hybrid_search_method(
+    query: str, current_obj_path: str, embedding_dict: dict, top_k: int = 4
+) -> List:
+    # Keyword Search Agent
+    @function_tool
+    def query_3D_object_by_keyword_search(query: str, match_code: str, top_k: int = 4):
+        logger.info("Datum Agent is running query_3D_object_by_keyword_search")
+        logger.info(f"The 'match' function is:\n```{match_code}```")
+        def match(metadata: dict) -> bool:
+            """
+            This function should be generated by the match_code provided.
+            It will check if the metadata matches the query.
+            """
+            return True
+        try:
+            exec(match_code, globals())
+            assert (
+                "def match(metadata: dict) -> bool:" in match_code
+            ), "The match function is not defined correctly."
+        except Exception:
+            raise gr.Error("Your query did not generate a valid match function.")
+        matched_obj_paths = list(
+            filter(
+                lambda obj_path: match(embedding_dict[obj_path]["metadata_dictionary"]),
+                embedding_dict,
+            )
+        )
+        top_files = [x for x in matched_obj_paths[:top_k]]
+        return top_files + [os.path.basename(x) for x in top_files]
+    METADATA_SCHEMA = """Schema of metadata_dictionary:
+- Volume: float
+- Surface_Area: float
+- Width: float
+- Height: float
+- Depth: float
+- Description: str
+- Description_Level: int
+- FileName: str
+- Created: str
+- Authors: str
+- Organizations: str
+- Preprocessor: str
+- OriginatingSystem: str
+- Authorization: str
+- Schema: str
+"""
+    QUERY_EXAMPLES = """Examples of natural language queries and their intended matching logic:
+### Example 1: "width greater than 7"
+```python
+def match(metadata: dict) -> bool:
+    try:
+        return float(metadata.get("Width", 0)) > 7
+    except:
+        return False
+````
+### Example 2: "description contains STEP"
+```python
+def match(metadata: dict) -> bool:
+    return "step" in str(metadata.get("Description", "")).lower()
+```
+### Example 3: "originating system is ASCON Math Kernel"
+```python
+def match(metadata: dict) -> bool:
+    return str(metadata.get("OriginatingSystem", "")).lower() == "ascon math kernel"
+```
+### Example 4: "volume < 200 and surface area > 300"
+```python
+def match(metadata: dict) -> bool:
+    try:
+        return float(metadata.get("Volume", 0)) < 200 and float(metadata.get("Surface_Area", 0)) > 300
+    except:
+        return False
+```
+### Example 5: "schema contains 214"
+```python
+def match(metadata: dict) -> bool:
+    return "214" in str(metadata.get("Schema", ""))
+```
+"""
+    MATCH_GEN_INSTRUCTION = """You are a Python code generator. Your job is to translate a natural language query into a function named `match(metadata: dict) -> bool`.
+Requirements:
+- Only use keys present in the schema.
+- Match strings case-insensitively.
+- For numerical comparisons, cast to float.
+- Combine conditions using logical `and`, `or` as inferred from natural language.
+- Handle missing keys by returning False.
+Return only the function code, nothing else.
+"""
+    @function_tool
+    def get_prompt_to_generate_match_code(query: str) -> str:
+        """
+        Generate a prompt to create a match function based on the user's query.
+        """
+        return (
+            METADATA_SCHEMA
+            + QUERY_EXAMPLES
+            + MATCH_GEN_INSTRUCTION
+            + f"\nQuery: {query}\n"
+        )
+    KEYWORD_SEARCH_AGENT_INSTRUCTIONS = """You are a Keyword Search Agent specialized in metadata-based filtering.
+Given a natural language query from the user, you will automatically generate an executable `match` function based on the prompt provided by `get_prompt_to_generate_match_code`.
+Combine the `match` function with `query_3D_object_by_keyword_search` to filter the top-K matching 3D object paths."""
+    keyword_search_agent = Agent(
+        name="Keyword Search Agent",
+        instructions=KEYWORD_SEARCH_AGENT_INSTRUCTIONS,
+        tools=[get_prompt_to_generate_match_code, query_3D_object_by_keyword_search],
+    )
+    @function_tool
+    def query_3D_object_by_semantic_search(query: str, top_k: int = 4):
+        logger.info("Datum Agent is running query_3D_object_by_semantic_search")
+        return query_3D_object_by_semantic_search_method(
+            query, current_obj_path, embedding_dict, top_k
+        )
+    @function_tool
+    def search_3D_similarity_factory(selected_filepath: str, top_k: int = 4):
+        logger.info("Datum Agent is running search_3D_similarity_factory")
+        return search_3D_similarity(selected_filepath, embedding_dict, top_k)
+    DATUM_AGENT_INSTRUCTIONS = """You are the Datum Agent: you retrieve the top-K most relevant 3D objects using three strategies.
+* Use `query_3D_object_by_semantic_search` for abstract or descriptive queries.
+* Use `search_3D_similarity_factory` when the query mentions the object currently displayed on the screen and aims to find similar objects.
+* Use **Keyword Search Agent** for precise metadata constraints or comparative/filtering information in the query.
+Return only the final tuple of file paths and display names.
+"""
+    HANDOFF_DESCRIPTION = """Handing off to Datum Agent: you can perform semantic search, keyword-based filtering, or visual similarity search.
+If metadata filtering is required, delegate to the **Keyword Search Agent** by calling `get_prompt_to_generate_match_code`.
+"""
+    datum_agent = Agent(
+        name="Datum Agent",
+        handoff_description=HANDOFF_DESCRIPTION,
+        instructions=DATUM_AGENT_INSTRUCTIONS,
+        tools=[
+            query_3D_object_by_semantic_search,
+            search_3D_similarity_factory,
+        ],
+        handoffs=[keyword_search_agent],
+    )
+    # Prepare the prompt for the Datum Agent
+    prompt_input = f"""An user is watching a 3D object and wants to query it.
+The query is: `{query}`.
+The current 3D object is `{current_obj_path}`.
+You need to find the most relevant 3D objects based on the query and return the top-k results.
+"""
+    ######################################################################
+    # Run the agent to get the results
+    ######################################################################
+    # result = Runner.run_streamed(starting_agent=datum_agent, input=prompt_input)
+    # in_memory_response = []
+    # async for event in result.stream_events():
+    #     if event.type == "run_item_stream_event":
+    #         item = event.item
+    #         if item.type == "tool_call_output_item":
+    #             in_memory_response += [item.output]
+    # logger.info(f"Datum Agent response: {in_memory_response}")
+    response = await Runner.run(datum_agent, prompt_input)  # agent's final output
+    # Filter the lastest output with `function_call_output` type
+    function_call_output_list = [
+        item
+        for item in response.to_input_list()
+        if item.get("type") == "function_call_output"
     ]
+    files_result = function_call_output_list[-1]
+    logger.info(f"Datum Agent raw response: {files_result}")
+    try:
+        result = ast.literal_eval(files_result.get("output", "[]"))  # type:ignore
+    except Exception as e:
+        logger.error(
+            f"Datum Agent did not return a valid list of file paths due to {e}"
+        )
+        raise gr.Error("Datum Agent did not return a valid list of file paths.")
+    if not isinstance(result, list):
+        raise gr.Error("Datum Agent did not return a valid list of file paths.")
+    if len(result) < 8:
+        raise gr.Error("Datum Agent did not return enough results. Please try again.")
+    return result
 ####################################################################################################################
 BASE_SAMPLE_DIR = "/Users/tridoan/Spartan/Datum/service-ai/poc/3D/gradio_cache/"
 sample_files = [
+    # BASE_SAMPLE_DIR + "C5 Knuckle Object.STEP",
+    # BASE_SAMPLE_DIR + "NEMA 17 Stepper Motor 23mm-NEMA 17 Stepper Motor 23mm.obj",
+    # BASE_SAMPLE_DIR + "TS6-THT_H-4.3.STEP",
+    # BASE_SAMPLE_DIR + "TS6-THT_H-5.0.STEP",
+    # BASE_SAMPLE_DIR + "TS6-THT_H-7.0.STEP",
+    # BASE_SAMPLE_DIR + "TS6-THT_H-7.3.STEP",
+    # BASE_SAMPLE_DIR + "TS6-THT_H-7.5.STEP",
+    # BASE_SAMPLE_DIR + "TS6-THT_H-11.0.STEP",
 ]
             + f".\n {'n' * 20}\nMetadata: "
             + metadata
         )
+        metadata_aggregation.update(
+            metadata_extraction
+        )  # !!! in-place function, return None
         # store embeddings and metadata
         embedding_dict[obj_path]["metadata"] = metadata
         embedding_dict[obj_path]["metadata_dictionary"] = normalize_metadata(
+            metadata_aggregation
         )
         embedding_dict[obj_path]["description"] = embeddings["description"]
         embedding_dict[obj_path]["image_embedding"] = embeddings["image_embedding"]
     # query button
     query_button.click(
         query_3D_object,
+        [query_input, model_render, embedding_store],
         [
             model_q_1,
             model_q_2,
     )
 if __name__ == "__main__":
+    _env = os.environ.get("ENVIRONMENT", "dev")
+    demo.launch(share=True if _env in ["dev", "prod"] else False)

requirements.txt CHANGED Viewed

@@ -16,4 +16,5 @@ numpy>=1.26.4,<2.0.0
 openai
 python-dotenv
 opencv-python
-Pillow

 openai
 python-dotenv
 opencv-python
+Pillow
+openai-agents