WitNote / internal /urlutil /urlutil.go
AUXteam's picture
Upload folder using huggingface_hub
6a7089a verified
// Package urlutil provides URL normalization and validation utilities.
package urlutil
import (
"fmt"
"net/url"
"strings"
)
// Normalize adds https:// if no protocol specified. Existing http/https preserved.
func Normalize(rawURL string) string {
if strings.HasPrefix(rawURL, "http://") || strings.HasPrefix(rawURL, "https://") {
return rawURL
}
return "https://" + rawURL
}
// Sanitize normalizes a URL. Bare hostnames get https:// added.
// All explicit schemes are passed through (user knows what they're doing).
func Sanitize(rawURL string) (string, error) {
if rawURL == "" {
return "", fmt.Errorf("empty URL")
}
// If URL has an explicit scheme, pass it through unchanged
if strings.Contains(rawURL, "://") ||
strings.HasPrefix(rawURL, "javascript:") ||
strings.HasPrefix(rawURL, "vbscript:") ||
strings.HasPrefix(rawURL, "about:") ||
strings.HasPrefix(rawURL, "data:") {
return rawURL, nil
}
// Bare hostname — normalize to https://
normalized := Normalize(rawURL)
parsed, err := url.Parse(normalized)
if err != nil {
return "", fmt.Errorf("invalid URL: %w", err)
}
if parsed.Host == "" {
return "", fmt.Errorf("missing host in URL")
}
return parsed.String(), nil
}
// IsValid returns true if URL is safe for navigation.
func IsValid(rawURL string) bool {
_, err := Sanitize(rawURL)
return err == nil
}
// ExtractHost returns the lowercase hostname without port. Empty string on failure.
func ExtractHost(rawURL string) string {
// url.Parse puts bare hostnames into Path when no scheme is present
if !strings.Contains(rawURL, "://") {
rawURL = "https://" + rawURL
}
parsed, err := url.Parse(rawURL)
if err != nil {
return ""
}
host := parsed.Hostname() // strips port
return strings.ToLower(host)
}