using System; using System.Collections.Generic; using System.Text; using System.Threading.Tasks; using Microsoft.Extensions.AI; using OnDeviceAgent.Inference; namespace OnDeviceAgent.AgentCore { public sealed class CountVisibleObjectsTool : AgentToolBase { readonly CameraInputService m_Camera; readonly YoloDetector m_Yolo; public CountVisibleObjectsTool(CameraInputService camera, YoloDetector yolo) { m_Camera = camera; m_Yolo = yolo; } public override string Name => "CountVisibleObjects"; public override string Description => "Count how many of something are in the live camera view right now (e.g. 'how many people do you see', '사람 몇 명 보여')."; public override AITool ToAIFunction(AgentToolContext context) { Func> body = null; if (m_Camera != null && m_Yolo != null) body = async () => { // WebCamTexture + Worker.Schedule are main-thread only. var detections = await context.Dispatcher.RunOnMainAsync>(async () => { if (!m_Camera.HasFrame) return null; // Surface the same frame in the result window (fires FrameJpegCaptured), matching the // vision tool — so the user sees what was counted, not just the number. m_Camera.GetLatestFrameJpeg(); if (!m_Yolo.IsReady) return new List(); return await m_Yolo.DetectAsync(m_Camera.GetLatestFrameTexture()).ConfigureAwait(false); }).ConfigureAwait(false); return detections == null ? "The camera has no frame yet." : Summarize(detections); }; return AIFunctionFactory.Create( new Func>(() => RunGuarded(context, "{}", body, "Vision is not configured.")), name: Name, description: Description); } static string Summarize(List detections) { if (detections == null || detections.Count == 0) return "No recognizable objects are visible in the current camera view."; var counts = new Dictionary(StringComparer.Ordinal); foreach (var d in detections) { counts.TryGetValue(d.ClassName, out var n); counts[d.ClassName] = n + 1; } var ordered = new List>(counts); ordered.Sort((a, b) => { if (a.Key == "person" && b.Key != "person") return -1; if (b.Key == "person" && a.Key != "person") return 1; return b.Value.CompareTo(a.Value); }); var sb = new StringBuilder("Currently visible: "); for (var i = 0; i < ordered.Count; i++) { if (i > 0) sb.Append(", "); sb.Append(ordered[i].Value).Append(' ').Append(Pluralize(ordered[i].Key, ordered[i].Value)); } sb.Append('.'); return sb.ToString(); } static string Pluralize(string noun, int count) { if (count <= 1) return noun; if (noun == "person") return "people"; if (noun.EndsWith("s") || noun.EndsWith("x") || noun.EndsWith("ch") || noun.EndsWith("sh")) return noun + "es"; return noun + "s"; } } }