19arjun89 commited on
Commit
a326604
·
verified ·
1 Parent(s): a5c204b

Create geo_logging.py

Browse files
Files changed (1) hide show
  1. geo_logging.py +160 -0
geo_logging.py ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ geo_logging.py
3
+ ----------------
4
+ Purpose:
5
+ This module implements privacy-preserving telemetry for the
6
+ AI Recruiting Agent Hugging Face Space.
7
+
8
+ Its sole purpose is to measure anonymous usage and adoption
9
+ metrics in order to:
10
+ - Understand how the tool is being used
11
+ - Improve reliability and performance
12
+ - Gauge sense of real-world adoption
13
+ - Support research and evaluation of responsible AI practices
14
+
15
+ Privacy Principles:
16
+ This module is explicitly designed to minimize data collection
17
+ and avoid storing any personally identifiable information (PII).
18
+
19
+ It DOES NOT collect or store:
20
+ - Raw IP addresses
21
+ - User names or Hugging Face account IDs
22
+ - Resume contents or job descriptions
23
+ - Emails, phone numbers, or file names
24
+ - Full user-agent strings or device fingerprints
25
+ - Any demographic attributes about users
26
+
27
+ It ONLY records:
28
+ - Approximate country and city (derived from IP, not stored)
29
+ - UTC timestamp of the event
30
+ - Space URL
31
+ - High-level event type (e.g., "app_open")
32
+ - Non-identifying, aggregate metadata (e.g., counts, booleans, latencies)
33
+
34
+ All usage logs are:
35
+ - Anonymized
36
+ - Append-only
37
+ - Persisted in a public Hugging Face Dataset repository (https://huggingface.co/datasets/19arjun89/ai_recruiting_agent_usage)
38
+ - Versioned via immutable commit history for auditability
39
+
40
+ Ethical Safeguards:
41
+ - Logging failures never break application functionality
42
+ - No raw identifiers are persisted at any time
43
+ - All telemetry is optional and best-effort
44
+ - The system is intended for transparency and improvement,
45
+ not for surveillance or profiling
46
+
47
+ Transparency:
48
+ A public-facing usage reporting Space will be provided to allow
49
+ independent verification of aggregate adoption metrics.
50
+
51
+ Author:
52
+ Arjun Singh
53
+
54
+ Last Updated:
55
+ 2026-01-22
56
+ """
57
+
58
+
59
+ import os
60
+ import json
61
+ from datetime import datetime
62
+ import requests
63
+ import gradio as gr
64
+ from huggingface_hub import HfApi, hf_hub_url
65
+
66
+ SPACE_URL = "https://huggingface.co/spaces/19arjun89/AI_Recruiting_Agent"
67
+ USAGE_DATASET_REPO = "19arjun89/ai_recruiting_agent_usage"
68
+ USAGE_JSONL_PATH = "usage/visits.jsonl"
69
+
70
+
71
+ def _hf_api():
72
+ token = os.environ.get("HF_TOKEN")
73
+ if not token:
74
+ return None
75
+ return HfApi(token=token)
76
+
77
+
78
+ def _download_text_if_exists(repo_id: str, path_in_repo: str) -> str:
79
+ try:
80
+ url = hf_hub_url(
81
+ repo_id=repo_id,
82
+ filename=path_in_repo,
83
+ repo_type="dataset"
84
+ )
85
+ r = requests.get(url, timeout=5)
86
+ if r.status_code == 200:
87
+ return r.text
88
+ except Exception:
89
+ pass
90
+ return ""
91
+
92
+
93
+ def _get_client_ip(request: gr.Request) -> str:
94
+ if request:
95
+ xff = request.headers.get("x-forwarded-for")
96
+ if xff:
97
+ return xff.split(",")[0].strip()
98
+ if request.client:
99
+ return request.client.host
100
+ return ""
101
+
102
+
103
+ def _geo_lookup(ip: str) -> dict:
104
+ try:
105
+ r = requests.get(f"https://ipapi.co/{ip}/json/", timeout=2)
106
+ if r.status_code == 200:
107
+ data = r.json()
108
+ return {
109
+ "country": data.get("country_name") or "Unknown",
110
+ "city": data.get("city") or "Unknown",
111
+ }
112
+ except Exception:
113
+ pass
114
+ return {"country": "Unknown", "city": "Unknown"}
115
+
116
+
117
+ def append_visit_to_dataset(country: str, city: str):
118
+ api = _hf_api()
119
+ if not api:
120
+ return
121
+
122
+ existing = _download_text_if_exists(
123
+ USAGE_DATASET_REPO,
124
+ USAGE_JSONL_PATH
125
+ )
126
+
127
+ event = {
128
+ "ts_utc": datetime.utcnow().isoformat() + "Z",
129
+ "space_url": SPACE_URL,
130
+ "country": country,
131
+ "city": city,
132
+ "event": "visit",
133
+ }
134
+
135
+ new_content = (
136
+ existing.rstrip("\n") + "\n"
137
+ if existing.strip()
138
+ else ""
139
+ ) + json.dumps(event) + "\n"
140
+
141
+ try:
142
+ api.upload_file(
143
+ repo_id=USAGE_DATASET_REPO,
144
+ repo_type="dataset",
145
+ path_in_repo=USAGE_JSONL_PATH,
146
+ path_or_fileobj=new_content.encode("utf-8"),
147
+ commit_message="append visit log",
148
+ )
149
+ except Exception:
150
+ pass
151
+
152
+
153
+ def record_visit(request: gr.Request):
154
+ ip = _get_client_ip(request)
155
+ geo = _geo_lookup(ip) if ip else {"country": "Unknown", "city": "Unknown"}
156
+ append_visit_to_dataset(
157
+ country=geo["country"],
158
+ city=geo["city"]
159
+ )
160
+ return