Spaces:
Sleeping
Sleeping
| """ | |
| Network Collector - перехват сетевых запросов | |
| """ | |
| import time | |
| from typing import Dict, List | |
| from .base import BaseCollector | |
| class NetworkCollector(BaseCollector): | |
| """ | |
| Перехватывает все сетевые запросы (fetch, XHR). | |
| Записывает: | |
| - URL, method, headers | |
| - Request body | |
| - Response status, headers, body | |
| - Timing (duration) | |
| """ | |
| name = "network" | |
| def __init__(self, session): | |
| super().__init__(session) | |
| self._step_requests = [] | |
| def inject(self): | |
| """Инжектит перехватчики fetch и XHR""" | |
| if not self.page: | |
| return | |
| interceptor_js = ''' | |
| (function() { | |
| if (window.__networkInterceptorInstalled) return; | |
| window.__networkInterceptorInstalled = true; | |
| window.__capturedRequests = []; | |
| // === FETCH INTERCEPTOR === | |
| const origFetch = window.fetch; | |
| window.fetch = async function(input, init) { | |
| const url = typeof input === 'string' ? input : input.url; | |
| const method = init?.method || 'GET'; | |
| const startTime = performance.now(); | |
| const requestId = Date.now() + '_' + Math.random().toString(36).substr(2, 9); | |
| // Capture request body | |
| let requestBody = null; | |
| if (init?.body) { | |
| try { | |
| if (typeof init.body === 'string') { | |
| requestBody = init.body.substring(0, 5000); | |
| } else if (init.body instanceof FormData) { | |
| requestBody = '[FormData]'; | |
| } else { | |
| requestBody = String(init.body).substring(0, 5000); | |
| } | |
| } catch(e) { | |
| requestBody = '[Unable to capture]'; | |
| } | |
| } | |
| // Capture request headers | |
| let requestHeaders = {}; | |
| if (init?.headers) { | |
| try { | |
| if (init.headers instanceof Headers) { | |
| init.headers.forEach((v, k) => requestHeaders[k] = v); | |
| } else { | |
| requestHeaders = {...init.headers}; | |
| } | |
| } catch(e) {} | |
| } | |
| try { | |
| const response = await origFetch.apply(this, arguments); | |
| const duration = performance.now() - startTime; | |
| // Capture response body | |
| let responseBody = null; | |
| try { | |
| const clone = response.clone(); | |
| const text = await clone.text(); | |
| responseBody = text.substring(0, 10000); | |
| } catch(e) {} | |
| // Capture response headers | |
| let responseHeaders = {}; | |
| try { | |
| response.headers.forEach((v, k) => responseHeaders[k] = v); | |
| } catch(e) {} | |
| window.__capturedRequests.push({ | |
| id: requestId, | |
| timestamp: Date.now(), | |
| type: 'fetch', | |
| url: url, | |
| method: method, | |
| requestHeaders: requestHeaders, | |
| requestBody: requestBody, | |
| status: response.status, | |
| statusText: response.statusText, | |
| responseHeaders: responseHeaders, | |
| responseBody: responseBody, | |
| duration: Math.round(duration), | |
| error: null | |
| }); | |
| return response; | |
| } catch(err) { | |
| window.__capturedRequests.push({ | |
| id: requestId, | |
| timestamp: Date.now(), | |
| type: 'fetch', | |
| url: url, | |
| method: method, | |
| requestHeaders: requestHeaders, | |
| requestBody: requestBody, | |
| status: 0, | |
| statusText: '', | |
| responseHeaders: {}, | |
| responseBody: null, | |
| duration: Math.round(performance.now() - startTime), | |
| error: err.message | |
| }); | |
| throw err; | |
| } | |
| }; | |
| // === XHR INTERCEPTOR === | |
| const origXHR = window.XMLHttpRequest; | |
| window.XMLHttpRequest = function() { | |
| const xhr = new origXHR(); | |
| const data = { | |
| id: Date.now() + '_' + Math.random().toString(36).substr(2, 9), | |
| type: 'xhr', | |
| timestamp: 0, | |
| url: '', | |
| method: '', | |
| requestHeaders: {}, | |
| requestBody: null, | |
| status: 0, | |
| statusText: '', | |
| responseHeaders: {}, | |
| responseBody: null, | |
| duration: 0, | |
| error: null | |
| }; | |
| let startTime = 0; | |
| const origOpen = xhr.open; | |
| xhr.open = function(method, url) { | |
| data.method = method; | |
| data.url = url; | |
| data.timestamp = Date.now(); | |
| startTime = performance.now(); | |
| return origOpen.apply(this, arguments); | |
| }; | |
| const origSetHeader = xhr.setRequestHeader; | |
| xhr.setRequestHeader = function(name, value) { | |
| data.requestHeaders[name] = value; | |
| return origSetHeader.apply(this, arguments); | |
| }; | |
| const origSend = xhr.send; | |
| xhr.send = function(body) { | |
| if (body) { | |
| try { | |
| data.requestBody = String(body).substring(0, 5000); | |
| } catch(e) {} | |
| } | |
| xhr.addEventListener('load', function() { | |
| data.status = xhr.status; | |
| data.statusText = xhr.statusText; | |
| data.duration = Math.round(performance.now() - startTime); | |
| try { | |
| data.responseBody = (xhr.responseText || '').substring(0, 10000); | |
| } catch(e) {} | |
| try { | |
| const headers = xhr.getAllResponseHeaders(); | |
| headers.split('\\r\\n').forEach(line => { | |
| const [name, ...rest] = line.split(': '); | |
| if (name) data.responseHeaders[name] = rest.join(': '); | |
| }); | |
| } catch(e) {} | |
| window.__capturedRequests.push({...data}); | |
| }); | |
| xhr.addEventListener('error', function() { | |
| data.error = 'XHR Error'; | |
| data.duration = Math.round(performance.now() - startTime); | |
| window.__capturedRequests.push({...data}); | |
| }); | |
| xhr.addEventListener('timeout', function() { | |
| data.error = 'XHR Timeout'; | |
| data.duration = Math.round(performance.now() - startTime); | |
| window.__capturedRequests.push({...data}); | |
| }); | |
| return origSend.apply(this, arguments); | |
| }; | |
| return xhr; | |
| }; | |
| console.log('[NetworkCollector] Interceptors installed'); | |
| })(); | |
| ''' | |
| try: | |
| # Инжектим в текущую страницу | |
| self.page.run_js(interceptor_js) | |
| # Инжектим для новых страниц | |
| self.page.run_cdp('Page.addScriptToEvaluateOnNewDocument', source=interceptor_js) | |
| self.log("Interceptors injected") | |
| except Exception as e: | |
| self.log(f"Injection failed: {e}") | |
| def collect(self) -> List[Dict]: | |
| """Собирает перехваченные запросы""" | |
| if not self.page: | |
| return [] | |
| try: | |
| requests = self.page.run_js(''' | |
| const reqs = window.__capturedRequests || []; | |
| window.__capturedRequests = []; | |
| return reqs; | |
| ''') or [] | |
| # Добавляем в общий список | |
| for req in requests: | |
| self.session.all_requests.append(req) | |
| self._step_requests.append(req) | |
| # Логируем важные запросы | |
| url = req.get('url', '') | |
| if any(x in url for x in ['api/', 'signin', 'oauth', 'token', 'fingerprint']): | |
| status = req.get('status', 0) | |
| duration = req.get('duration', 0) | |
| self.log(f"{req.get('method')} {url[:50]}... -> {status} ({duration}ms)") | |
| return requests | |
| except: | |
| return [] | |
| def on_step_start(self, step_name: str): | |
| """Очищаем буфер запросов для нового шага""" | |
| self._step_requests = [] | |
| def on_step_end(self, step): | |
| """Добавляем запросы к шагу""" | |
| step.requests = self._step_requests.copy() | |
| self._step_requests = [] | |