Spaces:
Runtime error
Runtime error
| // Test enhanced URL validation specifically for ArXiv and other problematic URLs | |
| async function testEnhancedValidation() { | |
| console.log('π Testing Enhanced URL Validation...\n'); | |
| const testUrls = [ | |
| // Valid ArXiv URLs | |
| 'https://arxiv.org/abs/2001.08361', // Real paper | |
| 'https://arxiv.org/abs/1706.03762', // Attention is All You Need | |
| // Invalid ArXiv URLs (the problematic ones) | |
| 'https://arxiv.org/abs/2024.rag.advances', // Invalid format | |
| 'https://arxiv.org/abs/2024.fake.paper', // Invalid format | |
| 'https://arxiv.org/abs/9999.99999', // Non-existent paper | |
| // Other problematic URLs | |
| 'https://vldb.org/vector-db-2024', // 404 page | |
| 'https://cvpr.org', // Unreachable | |
| ]; | |
| console.log('π§ͺ Testing individual URLs with enhanced validation...\n'); | |
| for (const url of testUrls) { | |
| try { | |
| console.log(`Testing: ${url}`); | |
| // Simulate the validation logic | |
| const urlObj = new URL(url); | |
| if (urlObj.hostname.includes('arxiv.org')) { | |
| // Test ArXiv validation | |
| const match = url.match(/arxiv\.org\/abs\/(.+)$/); | |
| if (match) { | |
| const paperId = match[1]; | |
| console.log(` ArXiv ID: ${paperId}`); | |
| // Check format | |
| const validFormats = [ | |
| /^\d{4}\.\d{4,5}$/, // New format: 2024.12345 | |
| /^[a-z-]+(\.[A-Z]{2})?\/\d{7}$/, // Old format: cs.AI/1234567 | |
| ]; | |
| const hasValidFormat = validFormats.some(regex => regex.test(paperId)); | |
| console.log(` Format valid: ${hasValidFormat}`); | |
| if (!hasValidFormat) { | |
| console.log(` Result: β INVALID (bad format)`); | |
| console.log(''); | |
| continue; | |
| } | |
| } | |
| } | |
| // Test actual URL | |
| const response = await fetch(url, { | |
| method: 'GET', | |
| signal: AbortSignal.timeout(5000), | |
| headers: { | |
| 'User-Agent': 'Knowledge-Base-Browser/1.0 (Enhanced Validator)' | |
| } | |
| }); | |
| console.log(` Status: ${response.status}`); | |
| if (!response.ok) { | |
| console.log(` Result: β INVALID (${response.status})`); | |
| } else { | |
| // Check content for errors | |
| const content = await response.text(); | |
| const errorIndicators = [ | |
| 'not recognized', | |
| 'might instead try to search', | |
| 'article identifier', | |
| 'not found', | |
| 'error' | |
| ]; | |
| const hasError = errorIndicators.some(indicator => | |
| content.toLowerCase().includes(indicator.toLowerCase()) | |
| ); | |
| if (hasError) { | |
| console.log(` Content: Contains error messages`); | |
| console.log(` Result: β INVALID (error content)`); | |
| } else { | |
| console.log(` Content: Valid`); | |
| console.log(` Result: β VALID`); | |
| } | |
| } | |
| } catch (error) { | |
| console.log(` Error: ${error.message}`); | |
| console.log(` Result: β INVALID (network error)`); | |
| } | |
| console.log(''); | |
| } | |
| console.log('π Testing search with enhanced validation...\n'); | |
| // Test the search endpoint to see if problematic URLs are filtered | |
| try { | |
| const response = await fetch('http://localhost:5000/api/search', { | |
| method: 'POST', | |
| headers: { 'Content-Type': 'application/json' }, | |
| body: JSON.stringify({ | |
| query: 'rag', | |
| searchType: 'semantic', | |
| limit: 10 | |
| }) | |
| }); | |
| if (response.ok) { | |
| const data = await response.json(); | |
| console.log(`Search for "rag" returned ${data.results.length} results:`); | |
| data.results.forEach((result, index) => { | |
| console.log(`${index + 1}. ${result.title}`); | |
| console.log(` URL: ${result.url}`); | |
| // Check if this is the problematic ArXiv URL | |
| if (result.url.includes('2024.rag.advances')) { | |
| console.log(` β οΈ This should have been filtered out!`); | |
| } else { | |
| console.log(` β Valid URL`); | |
| } | |
| console.log(''); | |
| }); | |
| } else { | |
| console.log('β Search request failed'); | |
| } | |
| } catch (error) { | |
| console.log('β Search test failed:', error.message); | |
| } | |
| console.log('π― Enhanced Validation Test Complete!'); | |
| } | |
| testEnhancedValidation(); |