tugaa
/

testAgentForHFSpace

Model card Files Files and versions

xet

Community

tugaa commited on May 20, 2025

Commit

21f9bfa

verified ·

1 Parent(s): 4a13218

Create modules/task_decomposer.py

Browse files

Files changed (1) hide show

modules/task_decomposer.py +47 -0

modules/task_decomposer.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# modules/task_decomposer.py
+import os
+from openai import AsyncOpenAI
+client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+class TaskDecomposer:
+    def __init__(self):
+        pass
+    async def decompose(self, context):
+        base_prompt = self._build_prompt(context)
+        response = await client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "system", "content": "You are a task planner that breaks down multimodal user goals into executable subtasks."},
+                {"role": "user", "content": base_prompt}
+            ]
+        )
+        content = response.choices[0].message.content
+        return self._parse_subtasks(content)
+    def _build_prompt(self, context):
+        description = []
+        if "text" in context:
+            description.append(f"Text: {context['text']}")
+        if "image_summary" in context:
+            description.append(f"Image summary: {context['image_summary']}")
+        if "video_summary" in context:
+            description.append(f"Video summary: {context['video_summary']}")
+        combined = "\n".join(description)
+        prompt = f"""
+Given the following multimodal input, generate a list of clear, web-searchable subtasks needed to achieve the user's goal. Output the list in JSON array format, with each item as an object containing 'query', 'language', and 'modality'.
+{combined}
+"""
+        return prompt
+    def _parse_subtasks(self, llm_output):
+        import json
+        try:
+            return json.loads(llm_output)
+        except:
+            return []