tugaa
/

testAgentForHFSpace

Model card Files Files and versions

xet

Community

tugaa commited on May 20, 2025

Commit

4a13218

verified ·

1 Parent(s): b308b2a

Create modules/input_processor.py

Browse files

Files changed (1) hide show

modules/input_processor.py +59 -0

modules/input_processor.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# modules/input_processor.py
+import os
+import asyncio
+import mimetypes
+import langdetect
+from PIL import Image
+from moviepy.editor import VideoFileClip
+from openai import AsyncOpenAI
+client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+class InputProcessor:
+    def __init__(self):
+        pass
+    async def process(self, text, image_path, video_path):
+        context = {"modality": [], "text": text}
+        if text:
+            try:
+                lang = langdetect.detect(text)
+            except:
+                lang = "unknown"
+            context["language"] = lang
+            context["modality"].append("text")
+        if image_path:
+            image = Image.open(image_path)
+            context["modality"].append("image")
+            context["image_preview"] = image
+            context["image_summary"] = await self.describe_image(image_path)
+        if video_path:
+            clip = VideoFileClip(video_path).subclip(0, min(5, VideoFileClip(video_path).duration))
+            keyframe_path = "/tmp/keyframe.jpg"
+            clip.save_frame(keyframe_path, t=1)
+            context["modality"].append("video")
+            context["video_preview"] = Image.open(keyframe_path)
+            context["video_summary"] = await self.describe_video(video_path)
+        return context
+    async def describe_image(self, image_path):
+        with open(image_path, "rb") as f:
+            response = await client.chat.completions.create(
+                model="gpt-4o",
+                messages=[
+                    {"role": "system", "content": "You are an assistant who explains image contents in concise text."},
+                    {"role": "user", "content": [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{f.read().hex()}"}}]}
+                ]
+            )
+        return response.choices[0].message.content
+    async def describe_video(self, video_path):
+        # 簡易版: キーフレームからLLMで説明
+        clip = VideoFileClip(video_path).subclip(0, min(5, VideoFileClip(video_path).duration))
+        keyframe_path = "/tmp/keyframe.jpg"
+        clip.save_frame(keyframe_path, t=1)
+        return await self.describe_image(keyframe_path)