Spaces:
Runtime error
Runtime error
| // Test URL validation system | |
| async function testUrlValidation() { | |
| console.log('π Testing URL Validation System...\n'); | |
| const testUrls = [ | |
| // Known working URLs | |
| 'https://github.com/microsoft/vscode', | |
| 'https://en.wikipedia.org/wiki/Machine_learning', | |
| 'https://arxiv.org/abs/2001.08361', | |
| // Known broken/problematic URLs | |
| 'https://vldb.org/vector-db-2024', // The URL you mentioned as broken | |
| 'https://cvpr.org', // The URL you mentioned with issues | |
| 'https://nonexistent-domain-12345.com', | |
| 'https://httpstat.us/404', // Returns 404 | |
| 'https://httpstat.us/500' // Returns 500 | |
| ]; | |
| console.log('π§ͺ Testing individual URL validation...\n'); | |
| for (const url of testUrls) { | |
| try { | |
| console.log(`Testing: ${url}`); | |
| const response = await fetch(url, { | |
| method: 'HEAD', | |
| signal: AbortSignal.timeout(5000), | |
| headers: { | |
| 'User-Agent': 'Knowledge-Base-Browser/1.0 (URL Validator)' | |
| } | |
| }); | |
| const isValid = response.status >= 200 && response.status < 400; | |
| console.log(` Status: ${response.status} - ${isValid ? 'β VALID' : 'β INVALID'}`); | |
| } catch (error) { | |
| console.log(` Error: ${error.message} - β INVALID`); | |
| } | |
| console.log(''); | |
| } | |
| console.log('π Testing search with URL validation...\n'); | |
| // Test the search endpoint | |
| try { | |
| const searchQuery = 'vector embedding generation'; | |
| console.log(`Searching for: "${searchQuery}"`); | |
| const response = await fetch('http://localhost:5000/api/search', { | |
| method: 'POST', | |
| headers: { 'Content-Type': 'application/json' }, | |
| body: JSON.stringify({ | |
| query: searchQuery, | |
| searchType: 'semantic', | |
| limit: 5 | |
| }) | |
| }); | |
| if (response.ok) { | |
| const data = await response.json(); | |
| console.log(`Found ${data.results.length} validated results:`); | |
| data.results.forEach((result, index) => { | |
| console.log(`${index + 1}. ${result.title}`); | |
| console.log(` URL: ${result.url}`); | |
| console.log(` Source: ${result.source || result.sourceType}`); | |
| console.log(''); | |
| }); | |
| console.log('β All returned URLs should now be accessible!'); | |
| } else { | |
| console.log('β Search request failed'); | |
| } | |
| } catch (error) { | |
| console.log('β Search test failed:', error.message); | |
| } | |
| console.log('\nπ― URL Validation Test Complete!'); | |
| console.log('π‘ The system now filters out broken/inaccessible websites'); | |
| } | |
| testUrlValidation(); |