Buckets:

rtrm's picture
download
raw
25.4 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Analytics and Metrics&quot;,&quot;local&quot;:&quot;analytics-and-metrics&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Understanding the graphs&quot;,&quot;local&quot;:&quot;understanding-the-graphs&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Number of (HTTP) Requests&quot;,&quot;local&quot;:&quot;number-of-http-requests&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Pending Requests&quot;,&quot;local&quot;:&quot;pending-requests&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Latency Distribution&quot;,&quot;local&quot;:&quot;latency-distribution&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Running Replicas&quot;,&quot;local&quot;:&quot;running-replicas&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Compute&quot;,&quot;local&quot;:&quot;compute&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Create an integration with the Inference Endpoints Metrics API&quot;,&quot;local&quot;:&quot;create-an-integration-with-the-inference-endpoints-metrics-api&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Connect with your internal tool&quot;,&quot;local&quot;:&quot;connect-with-your-internal-tool&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Subscribe to Enterprise&quot;,&quot;local&quot;:&quot;subscribe-to-enterprise&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/inference-endpoints/pr_136/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/entry/start.fb9ab4d6.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/scheduler.f6b352c8.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/singletons.ceca4163.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/index.26cf6c5a.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/paths.142cd5df.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/entry/app.6247727a.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/index.b90df637.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/nodes/0.2fcde12d.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/nodes/11.a0af82ee.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/getInferenceSnippets.1e3ae0bf.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Analytics and Metrics&quot;,&quot;local&quot;:&quot;analytics-and-metrics&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Understanding the graphs&quot;,&quot;local&quot;:&quot;understanding-the-graphs&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Number of (HTTP) Requests&quot;,&quot;local&quot;:&quot;number-of-http-requests&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Pending Requests&quot;,&quot;local&quot;:&quot;pending-requests&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Latency Distribution&quot;,&quot;local&quot;:&quot;latency-distribution&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Running Replicas&quot;,&quot;local&quot;:&quot;running-replicas&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Compute&quot;,&quot;local&quot;:&quot;compute&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Create an integration with the Inference Endpoints Metrics API&quot;,&quot;local&quot;:&quot;create-an-integration-with-the-inference-endpoints-metrics-api&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Connect with your internal tool&quot;,&quot;local&quot;:&quot;connect-with-your-internal-tool&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Subscribe to Enterprise&quot;,&quot;local&quot;:&quot;subscribe-to-enterprise&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="analytics-and-metrics" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#analytics-and-metrics"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Analytics and Metrics</span></h1> <p data-svelte-h="svelte-1qwdd7g">The Analytics page is like the control center for your deployed models. It tells you in real-time what’s going on, how many users are
calling your models, about hardware usage, latencies, and much more. In this documentation we’ll dive into what each metric means and
how to analyze the graphs.</p> <p data-svelte-h="svelte-1lzx4cg"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/analytics/1-intro.png" alt="intro"></p> <p data-svelte-h="svelte-2q1i31">In the top bar, you can configure the high level view:</p> <ul data-svelte-h="svelte-1tmrvm6"><li>Which replica to view metrics from: either an individual replica or all.</li> <li>If you want to view metrics related to requests, hardware, or timeline of replicas.</li> <li>Which time frame you’ll inspect the metrics, and this setting affects all graphs on the page. You can choose between any of the existing settings from the dropdown, or click-and-drag over any graph for a custom timeframe. You can also enable/disable
auto refresh or view the metrics per replica or all.</li></ul> <p data-svelte-h="svelte-s7g9t9"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/analytics/2-config.png" alt="config"></p> <h2 class="relative group"><a id="understanding-the-graphs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#understanding-the-graphs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Understanding the graphs</span></h2> <h3 class="relative group"><a id="number-of-http-requests" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#number-of-http-requests"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Number of (HTTP) Requests</span></h3> <p data-svelte-h="svelte-15jqzd1">The first graph at the top left shows you how many requests your Inference Endpoint has received. By default they are grouped by HTTP response
classes, but by switching the toggle you can view them by individual status. As a reminder the HTTP response classes are:</p> <ul data-svelte-h="svelte-18dtmml"><li><strong>Informational responses (100-199)</strong>: The server has received your request and is working on it. For example, <code>102 Processing</code> means the server is still handling your request.</li> <li><strong>Successful responses (200-299)</strong>: Your request was received and completed successfully. For example, <code>200 OK</code> means everything worked as expected.</li> <li><strong>Redirection messages (300-399)</strong>: The server is telling your client to look somewhere else for the information or to take another action. For example, <code>301 Moved Permanently</code> means the resource has a new address.</li> <li><strong>Client error responses (400-499)</strong>: There was a problem with the request sent by your client (like a typo in the URL or missing data). For example, <code>404 Not Found</code> means the server couldn’t find what you asked for.</li> <li><strong>Server error responses (500-599)</strong>: The server ran into an issue while trying to process your request. For example, <code>502 Bad Gateway</code> means the server got an invalid response from another server it tried to contact.</li></ul> <p data-svelte-h="svelte-1kwgrsz">We recommend checking the <a href="https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status" rel="nofollow">MDN web docs</a> for more information on individual
status codes.</p> <p data-svelte-h="svelte-1ibhate">The boxes above the graph also show the % of requests in the respective response class.</p> <p data-svelte-h="svelte-1xw4r5k"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/analytics/3-http-reqs.png" alt="http"></p> <h3 class="relative group"><a id="pending-requests" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pending-requests"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pending Requests</span></h3> <p data-svelte-h="svelte-4jupc9">Pending requests are requests that have not yet received an HTTP status, meaning they include in-flight requests and requests currently
being processed. If this metric increases too much, it means that your requests are queuing up, and your users have to wait for requests
to finish. In this case you should consider increasing your number of replicas or alternatively use autoscaling, you can read more about
it in the <a href="./autoscaling#scalingbasedonpendingrequests(betafeature)">autoscaling guide</a></p> <p data-svelte-h="svelte-eb33tn"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/analytics/4-pending-reqs.png" alt="pending"></p> <h3 class="relative group"><a id="latency-distribution" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#latency-distribution"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Latency Distribution</span></h3> <p data-svelte-h="svelte-20agt5">From this graph you’ll be able to see how long it takes for your Inference Endpoint to generate a response. Latency is reported as:</p> <ul data-svelte-h="svelte-1ypi2lx"><li><strong>p99</strong>: meaning that 99% of all requests were faster than this value</li> <li><strong>p95</strong>: meaning that 95% of all requests were faster than this value</li> <li><strong>p90</strong>: meaning that 90% of all requests were faster than this value</li> <li><strong>median</strong>: meaning that 50% of all requests were faster than this value</li></ul> <p data-svelte-h="svelte-19ch391">Usually a good metric is also to look at how big the difference is between the median and p99. The closer the values are to each other, the more
uniform the latency is, whereas if the difference is large, it means that the users of your Inference Endpoint have in general a fast response but
the worst case latencies can be long.</p> <p data-svelte-h="svelte-1eclpic"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/analytics/5-latency.png" alt="latency"></p> <h3 class="relative group"><a id="running-replicas" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#running-replicas"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Running Replicas</span></h3> <p data-svelte-h="svelte-kcly0l">In the running replica graph, you’ll see how many running replicas you have during a point in time. The red line shows
your current maximum replicas setting.</p> <p data-svelte-h="svelte-4i74o4"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/analytics/6-running.png" alt="status"></p> <p data-svelte-h="svelte-f72rot">For a more advanced view of different statuses for individual replicas, going from <em>pending</em> all the way
to <em>running</em>, you can toggle to the Timeline section. This is very useful to get a sense of how long it takes an Endpoint to become ready to serve requests.</p> <p data-svelte-h="svelte-bb5nbl"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/analytics/7-timeline.png" alt="advanced"></p> <h3 class="relative group"><a id="compute" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#compute"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Compute</span></h3> <p data-svelte-h="svelte-2qmv22">These four graphs are dedicated to hardware usage. You’ll find:</p> <ul data-svelte-h="svelte-c4522"><li>CPU usage: How much processing power is being used.</li> <li>Memory usage: How much RAM is being used.</li> <li>GPU usage: How much of the GPU’s processing power is being used.</li> <li>GPU Memory (VRAM) usage: How much GPU memory is being used.</li></ul> <p data-svelte-h="svelte-g3bjeh"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/analytics/8-usage.png" alt="usage"></p> <p data-svelte-h="svelte-1lyund8">By toggling “details” you can either view the average or per replica value for the metric in question.</p> <p data-svelte-h="svelte-1l2kowr">If you have autoscaling based on hardware utilization enabled, these are the metrics that determine your autoscaling behaviour. You can
read more about autoscaling <a href="./autoscaling#scalingbasedonhardwareutilization">here</a></p> <h2 class="relative group"><a id="create-an-integration-with-the-inference-endpoints-metrics-api" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#create-an-integration-with-the-inference-endpoints-metrics-api"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Create an integration with the Inference Endpoints Metrics API</span></h2> <p data-svelte-h="svelte-147g635"><strong>This feature is currently in Beta. You will need to be subscribed to Enterprise to take advantage of this feature.</strong></p> <p data-svelte-h="svelte-j0p83d">You have the ability to integrate the metrics of your Inference Endpoint(s) to your internal tool.</p> <p data-svelte-h="svelte-1ydj90u">Utilizing OpenMetrics, you can create an integration to allow for a more granular view of your Endpoint’s metrics in almost-real-time,
showing for example:</p> <ul data-svelte-h="svelte-14hfqsy"><li>requests grouped by replica</li> <li>latency distribution of requests</li> <li>hardware metrics for all accelerator types</li></ul> <p data-svelte-h="svelte-10une11">OpenMetrics is a standardized format for representing and transmitting time series data, making it easier for systems to consume and
process metrics, ensuring that the data is structured optimally for storage and transport.</p> <p data-svelte-h="svelte-n96fxh">Further configurations and notifications can be set up for your Endpoints based on these metrics in your internal tool.</p> <h3 class="relative group"><a id="connect-with-your-internal-tool" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#connect-with-your-internal-tool"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Connect with your internal tool</span></h3> <p data-svelte-h="svelte-1cy1im7">There are a variety of tools that work with OpenMetrics. You’ll need to set up an agent. Here’s some example docs to help get you started:</p> <ul data-svelte-h="svelte-1l539gy"><li><a href="https://docs.datadoghq.com/integrations/openmetrics/" rel="nofollow">Datadog</a></li> <li><a href="https://tinyurl.com/e4fypk5m" rel="nofollow">Grafana</a></li></ul> <h3 class="relative group"><a id="subscribe-to-enterprise" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#subscribe-to-enterprise"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Subscribe to Enterprise</span></h3> <p data-svelte-h="svelte-1vhnhgq">You can sign up for an Enterprise plan starting at $20/user/mo at anytime at <a href="https://huggingface.co/enterprise?subscribe=true" rel="nofollow">https://huggingface.co/enterprise?subscribe=true</a>.
For any questions or feature requests, please email us at <a href="mailto:api-enterprise@huggingface.co">api-enterprise@huggingface.co</a></p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/guides/analytics.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1q0n26o = {
assets: "/docs/inference-endpoints/pr_136/en",
base: "/docs/inference-endpoints/pr_136/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-endpoints/pr_136/en/_app/immutable/entry/start.fb9ab4d6.js"),
import("/docs/inference-endpoints/pr_136/en/_app/immutable/entry/app.6247727a.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 11],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
25.4 kB
·
Xet hash:
fad77cf9fc62266cb1b806338e18cde8af656d110c945deebe5257250338cef8

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.