| | from queue import Queue |
| | from threading import Lock, Thread |
| | from typing import Dict, Optional, Union |
| | from urllib.parse import quote |
| |
|
| | from .. import constants, logging |
| | from . import build_hf_headers, get_session, hf_raise_for_status |
| |
|
| |
|
| | logger = logging.get_logger(__name__) |
| |
|
| | |
| | |
| | |
| | _TELEMETRY_THREAD: Optional[Thread] = None |
| | _TELEMETRY_THREAD_LOCK = Lock() |
| | _TELEMETRY_QUEUE: Queue = Queue() |
| |
|
| |
|
| | def send_telemetry( |
| | topic: str, |
| | *, |
| | library_name: Optional[str] = None, |
| | library_version: Optional[str] = None, |
| | user_agent: Union[Dict, str, None] = None, |
| | ) -> None: |
| | """ |
| | Sends telemetry that helps tracking usage of different HF libraries. |
| | |
| | This usage data helps us debug issues and prioritize new features. However, we understand that not everyone wants |
| | to share additional information, and we respect your privacy. You can disable telemetry collection by setting the |
| | `HF_HUB_DISABLE_TELEMETRY=1` as environment variable. Telemetry is also disabled in offline mode (i.e. when setting |
| | `HF_HUB_OFFLINE=1`). |
| | |
| | Telemetry collection is run in a separate thread to minimize impact for the user. |
| | |
| | Args: |
| | topic (`str`): |
| | Name of the topic that is monitored. The topic is directly used to build the URL. If you want to monitor |
| | subtopics, just use "/" separation. Examples: "gradio", "transformers/examples",... |
| | library_name (`str`, *optional*): |
| | The name of the library that is making the HTTP request. Will be added to the user-agent header. |
| | library_version (`str`, *optional*): |
| | The version of the library that is making the HTTP request. Will be added to the user-agent header. |
| | user_agent (`str`, `dict`, *optional*): |
| | The user agent info in the form of a dictionary or a single string. It will be completed with information about the installed packages. |
| | |
| | Example: |
| | ```py |
| | >>> from huggingface_hub.utils import send_telemetry |
| | |
| | # Send telemetry without library information |
| | >>> send_telemetry("ping") |
| | |
| | # Send telemetry to subtopic with library information |
| | >>> send_telemetry("gradio/local_link", library_name="gradio", library_version="3.22.1") |
| | |
| | # Send telemetry with additional data |
| | >>> send_telemetry( |
| | ... topic="examples", |
| | ... library_name="transformers", |
| | ... library_version="4.26.0", |
| | ... user_agent={"pipeline": "text_classification", "framework": "flax"}, |
| | ... ) |
| | ``` |
| | """ |
| | if constants.HF_HUB_OFFLINE or constants.HF_HUB_DISABLE_TELEMETRY: |
| | return |
| |
|
| | _start_telemetry_thread() |
| | _TELEMETRY_QUEUE.put( |
| | {"topic": topic, "library_name": library_name, "library_version": library_version, "user_agent": user_agent} |
| | ) |
| |
|
| |
|
| | def _start_telemetry_thread(): |
| | """Start a daemon thread to consume tasks from the telemetry queue. |
| | |
| | If the thread is interrupted, start a new one. |
| | """ |
| | with _TELEMETRY_THREAD_LOCK: |
| | global _TELEMETRY_THREAD |
| | if _TELEMETRY_THREAD is None or not _TELEMETRY_THREAD.is_alive(): |
| | _TELEMETRY_THREAD = Thread(target=_telemetry_worker, daemon=True) |
| | _TELEMETRY_THREAD.start() |
| |
|
| |
|
| | def _telemetry_worker(): |
| | """Wait for a task and consume it.""" |
| | while True: |
| | kwargs = _TELEMETRY_QUEUE.get() |
| | _send_telemetry_in_thread(**kwargs) |
| | _TELEMETRY_QUEUE.task_done() |
| |
|
| |
|
| | def _send_telemetry_in_thread( |
| | topic: str, |
| | *, |
| | library_name: Optional[str] = None, |
| | library_version: Optional[str] = None, |
| | user_agent: Union[Dict, str, None] = None, |
| | ) -> None: |
| | """Contains the actual data sending data to the Hub. |
| | |
| | This function is called directly in gradio's analytics because |
| | it is not possible to send telemetry from a daemon thread. |
| | |
| | See here: https://github.com/gradio-app/gradio/pull/8180 |
| | |
| | Please do not rename or remove this function. |
| | """ |
| | path = "/".join(quote(part) for part in topic.split("/") if len(part) > 0) |
| | try: |
| | r = get_session().head( |
| | f"{constants.ENDPOINT}/api/telemetry/{path}", |
| | headers=build_hf_headers( |
| | token=False, |
| | library_name=library_name, |
| | library_version=library_version, |
| | user_agent=user_agent, |
| | ), |
| | ) |
| | hf_raise_for_status(r) |
| | except Exception as e: |
| | |
| | logger.debug(f"Error while sending telemetry: {e}") |
| |
|