// profilesync persists the Chrome user-data profile across Hugging Face Space // restarts/rebuilds by snapshotting it into a Postgres (Neon) bytea blob. // // profilesync restore — pull the latest snapshot and extract it (run at boot) // profilesync backup — tar+gzip the profile and upsert it (run on shutdown / periodically) // // The connection string comes from the DATABASE_URL env var (a Space secret) — it // is never hardcoded. Cache/junk directories are excluded so the blob stays small // enough for Neon's free tier; only login state (cookies, Local Storage, IndexedDB, // Login Data, …) is kept. package main import ( "archive/tar" "bytes" "compress/gzip" "context" "errors" "io" "log" "os" "path/filepath" "strings" "time" "github.com/jackc/pgx/v5" ) const ( profileDir = "/home/chrome/data" // Chrome --user-data-dir baseName = "data" // archive top-level dir ) func main() { log.SetFlags(0) log.SetPrefix("[profilesync] ") if len(os.Args) < 2 { log.Fatal("usage: profilesync [backup|restore]") } mode := os.Args[1] dsn := os.Getenv("DATABASE_URL") if dsn == "" { log.Println("DATABASE_URL not set — skipping") return } ctx, cancel := context.WithTimeout(context.Background(), 90*time.Second) defer cancel() conn, err := pgx.Connect(ctx, dsn) if err != nil { log.Fatalf("connect: %v", err) } defer conn.Close(ctx) if _, err := conn.Exec(ctx, `CREATE TABLE IF NOT EXISTS chrome_profile ( id int PRIMARY KEY DEFAULT 1, updated_at timestamptz DEFAULT now(), data bytea )`); err != nil { log.Fatalf("create table: %v", err) } switch mode { case "backup": if err := backup(ctx, conn); err != nil { log.Fatalf("backup: %v", err) } case "restore": if err := restore(ctx, conn); err != nil { log.Fatalf("restore: %v", err) } default: log.Fatalf("unknown mode %q (want backup|restore)", mode) } } // skipSegment reports whether a path segment is cache/junk we never persist. func skipSegment(name string) bool { if strings.Contains(name, "Cache") || strings.HasPrefix(name, "Singleton") { return true } switch name { case "Crashpad", "component_crx_cache", "optimization_guide_model_store", "GraphiteDawnCache", "BrowserMetrics", "Safe Browsing", "LOCK", "LOG.old": return true } return false } func backup(ctx context.Context, conn *pgx.Conn) error { if _, err := os.Stat(profileDir); err != nil { log.Printf("profile dir %s missing — nothing to back up", profileDir) return nil } var buf bytes.Buffer gz := gzip.NewWriter(&buf) tw := tar.NewWriter(gz) walkErr := filepath.Walk(profileDir, func(path string, info os.FileInfo, err error) error { if err != nil { return nil // skip unreadable entries } rel, err := filepath.Rel(profileDir, path) if err != nil || rel == "." { return nil } for _, seg := range strings.Split(rel, string(os.PathSeparator)) { if skipSegment(seg) { if info.IsDir() { return filepath.SkipDir } return nil } } if !info.IsDir() && !info.Mode().IsRegular() { return nil // skip sockets/symlinks/etc. } hdr, err := tar.FileInfoHeader(info, "") if err != nil { return nil } hdr.Name = filepath.ToSlash(filepath.Join(baseName, rel)) if info.IsDir() { hdr.Name += "/" } if err := tw.WriteHeader(hdr); err != nil { return err } if info.IsDir() { return nil } f, err := os.Open(path) if err != nil { return nil // file vanished or locked — skip it } defer f.Close() io.Copy(tw, f) return nil }) if walkErr != nil { return walkErr } if err := tw.Close(); err != nil { return err } if err := gz.Close(); err != nil { return err } if _, err := conn.Exec(ctx, `INSERT INTO chrome_profile (id, data, updated_at) VALUES (1, $1, now()) ON CONFLICT (id) DO UPDATE SET data = EXCLUDED.data, updated_at = now()`, buf.Bytes()); err != nil { return err } log.Printf("backed up %d KB (compressed) to Postgres", buf.Len()/1024) return nil } func restore(ctx context.Context, conn *pgx.Conn) error { var data []byte err := conn.QueryRow(ctx, `SELECT data FROM chrome_profile WHERE id = 1`).Scan(&data) if errors.Is(err, pgx.ErrNoRows) { log.Println("no snapshot yet — fresh start") return nil } if err != nil { return err } if len(data) == 0 { log.Println("empty snapshot — fresh start") return nil } gz, err := gzip.NewReader(bytes.NewReader(data)) if err != nil { return err } defer gz.Close() tr := tar.NewReader(gz) root := filepath.Dir(profileDir) // /home/chrome ; "data/..." lands at /home/chrome/data/... cleanRoot := filepath.Clean(root) var n int for { hdr, err := tr.Next() if errors.Is(err, io.EOF) { break } if err != nil { return err } target := filepath.Join(root, filepath.Clean(hdr.Name)) if target != cleanRoot && !strings.HasPrefix(target, cleanRoot+string(os.PathSeparator)) { continue // guard against path traversal } switch hdr.Typeflag { case tar.TypeDir: os.MkdirAll(target, 0o755) case tar.TypeReg: os.MkdirAll(filepath.Dir(target), 0o755) f, err := os.OpenFile(target, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.FileMode(hdr.Mode)&0o777) if err != nil { return err } if _, err := io.Copy(f, tr); err != nil { f.Close() return err } f.Close() n++ } } log.Printf("restored %d files from Postgres snapshot", n) return nil }