com.sky.ondeviceagent / Runtime /AgentCore /Tools /Builtin /CountVisibleObjectsTool.cs
Sky-Kim's picture
Initial commit
2e7837a
Raw
History Blame Contribute Delete
3.68 kB
using System;
using System.Collections.Generic;
using System.Text;
using System.Threading.Tasks;
using Microsoft.Extensions.AI;
using OnDeviceAgent.Inference;
namespace OnDeviceAgent.AgentCore
{
public sealed class CountVisibleObjectsTool : AgentToolBase
{
readonly CameraInputService m_Camera;
readonly YoloDetector m_Yolo;
public CountVisibleObjectsTool(CameraInputService camera, YoloDetector yolo)
{
m_Camera = camera;
m_Yolo = yolo;
}
public override string Name => "CountVisibleObjects";
public override string Description =>
"Count how many of something are in the live camera view right now (e.g. 'how many people do you see', '사람 몇 명 보여').";
public override AITool ToAIFunction(AgentToolContext context)
{
Func<Task<string>> body = null;
if (m_Camera != null && m_Yolo != null)
body = async () =>
{
// WebCamTexture + Worker.Schedule are main-thread only.
var detections = await context.Dispatcher.RunOnMainAsync<List<YoloDetector.Detection>>(async () =>
{
if (!m_Camera.HasFrame) return null;
// Surface the same frame in the result window (fires FrameJpegCaptured), matching the
// vision tool — so the user sees what was counted, not just the number.
m_Camera.GetLatestFrameJpeg();
if (!m_Yolo.IsReady) return new List<YoloDetector.Detection>();
return await m_Yolo.DetectAsync(m_Camera.GetLatestFrameTexture()).ConfigureAwait(false);
}).ConfigureAwait(false);
return detections == null ? "The camera has no frame yet." : Summarize(detections);
};
return AIFunctionFactory.Create(
new Func<Task<string>>(() => RunGuarded(context, "{}", body, "Vision is not configured.")),
name: Name,
description: Description);
}
static string Summarize(List<YoloDetector.Detection> detections)
{
if (detections == null || detections.Count == 0)
return "No recognizable objects are visible in the current camera view.";
var counts = new Dictionary<string, int>(StringComparer.Ordinal);
foreach (var d in detections)
{
counts.TryGetValue(d.ClassName, out var n);
counts[d.ClassName] = n + 1;
}
var ordered = new List<KeyValuePair<string, int>>(counts);
ordered.Sort((a, b) =>
{
if (a.Key == "person" && b.Key != "person") return -1;
if (b.Key == "person" && a.Key != "person") return 1;
return b.Value.CompareTo(a.Value);
});
var sb = new StringBuilder("Currently visible: ");
for (var i = 0; i < ordered.Count; i++)
{
if (i > 0) sb.Append(", ");
sb.Append(ordered[i].Value).Append(' ').Append(Pluralize(ordered[i].Key, ordered[i].Value));
}
sb.Append('.');
return sb.ToString();
}
static string Pluralize(string noun, int count)
{
if (count <= 1) return noun;
if (noun == "person") return "people";
if (noun.EndsWith("s") || noun.EndsWith("x") || noun.EndsWith("ch") || noun.EndsWith("sh"))
return noun + "es";
return noun + "s";
}
}
}