File size: 3,993 Bytes
7cc1131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704a297
 
 
 
7cc1131
 
 
704a297
7cc1131
 
 
 
 
 
704a297
 
 
 
 
 
 
 
7cc1131
 
 
 
 
704a297
7cc1131
 
704a297
 
 
 
7cc1131
 
 
 
 
704a297
 
7cc1131
704a297
 
 
 
 
 
 
 
 
7cc1131
 
 
 
 
 
 
 
 
 
 
 
704a297
 
 
 
 
 
 
 
 
7cc1131
704a297
 
7cc1131
704a297
 
 
 
7cc1131
704a297
7cc1131
704a297
 
 
 
7cc1131
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
'use strict';

const fs = require('fs');
const path = require('path');
const { fetchRobust } = require('./fetch-utils');

const PROVIDERS_FILE = path.join(__dirname, '..', 'data', 'providers.json');

async function checkHfId(hfId) {
  if (!hfId) return { valid: true, status: 'N/A' };
  const url = `https://huggingface.co/${hfId}`;
  try {
    const res = await fetchRobust(url, { method: 'HEAD', retries: 1 });
    if (res.status === 200 || res.status === 302) {
      return { valid: true, status: res.status };
    }
    return { valid: false, status: res.status };
  } catch (e) {
    if (e.message.includes('404')) return { valid: false, status: 404 };
    return { valid: true, status: 'Error (Assume valid)' }; 
  }
}

async function main() {
  const force = process.argv.includes('--force');
  console.log('Starting Hugging Face Repository Validation...');
  if (force) console.log('  [!] Force mode enabled: checking all IDs regardless of cache.\n');
  else console.log('  [i] Using cache: only checking IDs not validated in the last 30 days.\n');
  
  const data = JSON.parse(fs.readFileSync(PROVIDERS_FILE, 'utf8'));
  const hfIdToModels = new Map();
  const hfIdMeta = new Map(); // Store metadata (validated_at, status)
  
  data.providers.forEach(p => {
    p.models.forEach(m => {
      if (m.hf_id) {
        if (!hfIdToModels.has(m.hf_id)) hfIdToModels.set(m.hf_id, []);
        hfIdToModels.get(m.hf_id).push(`${p.name}: ${m.name}`);
        
        // Cache metadata if present
        if (m.hf_validated_at && m.hf_status === 200) {
          const existing = hfIdMeta.get(m.hf_id);
          if (!existing || new Date(m.hf_validated_at) > new Date(existing.at)) {
            hfIdMeta.set(m.hf_id, { at: m.hf_validated_at, status: m.hf_status });
          }
        }
      }
    });
  });

  const ids = Array.from(hfIdToModels.keys());
  console.log(`Found ${ids.length} unique HF IDs to validate.\n`);
  
  const invalidIds = new Set();
  const now = new Date();
  const THIRTY_DAYS_MS = 30 * 24 * 60 * 60 * 1000;
  
  const validationResults = new Map(); // id -> { status, at }

  for (let i = 0; i < ids.length; i++) {
    const id = ids[i];
    const progress = `[${i + 1}/${ids.length}]`.padEnd(10);
    
    const cached = hfIdMeta.get(id);
    const isRecent = cached && (now - new Date(cached.at) < THIRTY_DAYS_MS);
    
    if (isRecent && !force) {
      console.log(`${progress} ≈ CACHED  (${cached.status}) ${id} (last checked ${new Date(cached.at).toLocaleDateString()})`);
      validationResults.set(id, { status: cached.status, at: cached.at });
      continue;
    }

    const check = await checkHfId(id);
    validationResults.set(id, { status: typeof check.status === 'number' ? check.status : 200, at: now.toISOString() });

    if (check.valid) {
      console.log(`${progress} ✓ VALID   (${check.status}) ${id}`);
    } else {
      console.log(`${progress} ✗ INVALID (${check.status}) ${id}`);
      console.log(`          Used by: ${hfIdToModels.get(id).join(', ')}`);
      invalidIds.add(id);
    }

    // Small delay to prevent rate limiting
    await new Promise(r => setTimeout(r, 50));
  }

  console.log('\nUpdating providers.json with validation results...');
  let updatedCount = 0;
  let removalCount = 0;
  
  data.providers.forEach(p => {
    p.models.forEach(m => {
      if (m.hf_id) {
        const res = validationResults.get(m.hf_id);
        if (invalidIds.has(m.hf_id)) {
          delete m.hf_id;
          delete m.hf_validated_at;
          delete m.hf_status;
          removalCount++;
        } else if (res) {
          m.hf_validated_at = res.at;
          m.hf_status = res.status;
          updatedCount++;
        }
      }
    });
  });

  fs.writeFileSync(PROVIDERS_FILE, JSON.stringify(data, null, 2));
  console.log(`Done. Updated ${updatedCount} models, removed ${removalCount} invalid IDs.`);
}

main().catch(err => {
  console.error('\nFatal error during validation:', err);
  process.exit(1);
});