Spaces:

CyberAl
/

global_dashboard

Sleeping

cyberai-1

Add luc link

529ea75 25 days ago

6.01 kB

	import json
	import os
	import re
	from typing import Dict, List
	from urllib.parse import quote, unquote, urlparse
	from urllib.error import HTTPError
	from urllib.request import Request, urlopen

	from fastapi import FastAPI, HTTPException, Query
	from fastapi.responses import HTMLResponse, Response

	app = FastAPI()

	DEFAULT_GROUP_LINKS = [
	"https://huggingface.co/spaces/CyberAl/Traffic-Tracker/tree/main/data",
	"https://huggingface.co/spaces/niangmariame513/traffic-monitor/tree/main/data",
	"https://huggingface.co/spaces/danielle2035/TRAFFIC_ROAD_APP/tree/main/data",
	"https://huggingface.co/spaces/Rafiatou/trafficvision-group10/tree/main/data",
	"https://huggingface.co/spaces/Binta26/computer_vision/tree/main/data",
	"https://huggingface.co/datasets/ccspoet/ProjectCV/tree/main/Data",
	"https://huggingface.co/spaces/AhmedSouley01/traffic-monitoring/tree/main/data",
	"https://huggingface.co/datasets/conde621gmail/dataset/tree/main",
	"https://huggingface.co/datasets/ioget/aims-traffic-cv-data/tree/main",
	"https://huggingface.co/datasets/luclintos/traffic_object_detection/tree/main",
	]

	VIDEO_EXTENSIONS = (".mp4", ".webm", ".mov", ".avi", ".mkv", ".m4v")


	def configured_group_links() -> List[str]:
	raw = os.getenv("HF_GROUP_LINKS", "").strip()
	if not raw:
	return DEFAULT_GROUP_LINKS

	if raw.startswith("["):
	try:
	links = json.loads(raw)
	return [str(link).strip() for link in links if str(link).strip()]
	except json.JSONDecodeError as exc:
	raise HTTPException(status_code=500, detail=f"Invalid HF_GROUP_LINKS JSON: {exc}")

	return [link.strip() for link in re.split(r"[\n,]+", raw) if link.strip()]


	def parse_hf_tree_url(url: str) -> Dict[str, str]:
	parsed = urlparse(url.strip())
	if parsed.netloc != "huggingface.co":
	raise HTTPException(status_code=400, detail="Only huggingface.co links are supported.")

	parts = [unquote(part) for part in parsed.path.strip("/").split("/") if part]
	if len(parts) < 5 or parts[3] != "tree":
	raise HTTPException(
	status_code=400,
	detail="Expected a Hugging Face tree URL like https://huggingface.co/spaces/user/repo/tree/main/data",
	)

	repo_kind = parts[0]
	if repo_kind not in {"spaces", "datasets"}:
	raise HTTPException(status_code=400, detail="Only Hugging Face spaces and datasets are supported.")

	owner = parts[1]
	repo = parts[2]
	revision = parts[4]
	path_parts = parts[5:]
	folder_path = "/".join(path_parts) or ""

	return {
	"repo_kind": repo_kind,
	"owner": owner,
	"repo": repo,
	"revision": revision,
	"folder_path": folder_path,
	"repo_id": f"{owner}/{repo}",
	}


	def hf_api_prefix(repo_kind: str) -> str:
	return "spaces" if repo_kind == "spaces" else "datasets"


	def hf_raw_prefix(repo_kind: str) -> str:
	return "spaces" if repo_kind == "spaces" else "datasets"


	def fetch_json(url: str):
	req = Request(url, headers={"User-Agent": "TrafficSense-dashboard"})
	with urlopen(req, timeout=20) as response:
	return json.loads(response.read().decode("utf-8"))


	def discover_source(tree_url: str) -> Dict:
	info = parse_hf_tree_url(tree_url)
	api_path = quote(info["folder_path"], safe="/")
	repo_id = quote(info["repo_id"], safe="/")
	api_url = (
	f"https://huggingface.co/api/{hf_api_prefix(info['repo_kind'])}/"
	f"{repo_id}/tree/{quote(info['revision'], safe='')}/{api_path}"
	).rstrip("/") + "?recursive=1"
	warning = None
	try:
	items = fetch_json(api_url)
	except HTTPError as exc:
	if exc.code != 404:
	raise
	warning = f"Folder '{info['folder_path']}' was not found."
	items = []

	files = []
	for item in items:
	if item.get("type") != "file":
	continue
	file_path = item.get("path", "")
	lower_path = file_path.lower()
	raw_url = (
	f"https://huggingface.co/{hf_raw_prefix(info['repo_kind'])}/{info['repo_id']}"
	f"/resolve/{quote(info['revision'], safe='')}/{quote(file_path, safe='/')}"
	)
	kind = "other"
	if lower_path.endswith(".csv"):
	kind = "csv"
	elif lower_path.endswith(VIDEO_EXTENSIONS):
	kind = "video"
	files.append({
	"name": file_path.split("/")[-1],
	"path": file_path,
	"size": item.get("size"),
	"kind": kind,
	"url": raw_url,
	})

	return {
	"link": tree_url,
	"repo": info["repo_id"],
	"folder": info["folder_path"],
	"warning": warning,
	"csv_files": [file for file in files if file["kind"] == "csv"],
	"video_files": [file for file in files if file["kind"] == "video"],
	}

	@app.get("/", response_class=HTMLResponse)
	def read_root():
	with open("index.html", "r", encoding="utf-8") as f:
	return f.read()


	@app.get("/api/group-sources")
	def group_sources():
	sources = []
	errors = []
	for link in configured_group_links():
	try:
	sources.append(discover_source(link))
	except Exception as exc:
	errors.append({"link": link, "error": str(exc)})
	return {"sources": sources, "errors": errors}


	@app.get("/api/hf-source")
	def hf_source(url: str = Query(..., min_length=1)):
	return discover_source(url)


	@app.get("/api/proxy")
	def proxy(url: str = Query(..., min_length=1)):
	parsed = urlparse(url)
	if parsed.netloc != "huggingface.co" or "/resolve/" not in parsed.path:
	raise HTTPException(status_code=400, detail="Only Hugging Face resolve URLs can be proxied.")

	req = Request(url, headers={"User-Agent": "TrafficSense-dashboard"})
	with urlopen(req, timeout=30) as response:
	content_type = response.headers.get("content-type", "application/octet-stream")
	return Response(content=response.read(), media_type=content_type)