StarrySkyWorld's picture
Initial commit
494c89b
"""
Network Collector - перехват сетевых запросов
"""
import time
from typing import Dict, List
from .base import BaseCollector
class NetworkCollector(BaseCollector):
"""
Перехватывает все сетевые запросы (fetch, XHR).
Записывает:
- URL, method, headers
- Request body
- Response status, headers, body
- Timing (duration)
"""
name = "network"
def __init__(self, session):
super().__init__(session)
self._step_requests = []
def inject(self):
"""Инжектит перехватчики fetch и XHR"""
if not self.page:
return
interceptor_js = '''
(function() {
if (window.__networkInterceptorInstalled) return;
window.__networkInterceptorInstalled = true;
window.__capturedRequests = [];
// === FETCH INTERCEPTOR ===
const origFetch = window.fetch;
window.fetch = async function(input, init) {
const url = typeof input === 'string' ? input : input.url;
const method = init?.method || 'GET';
const startTime = performance.now();
const requestId = Date.now() + '_' + Math.random().toString(36).substr(2, 9);
// Capture request body
let requestBody = null;
if (init?.body) {
try {
if (typeof init.body === 'string') {
requestBody = init.body.substring(0, 5000);
} else if (init.body instanceof FormData) {
requestBody = '[FormData]';
} else {
requestBody = String(init.body).substring(0, 5000);
}
} catch(e) {
requestBody = '[Unable to capture]';
}
}
// Capture request headers
let requestHeaders = {};
if (init?.headers) {
try {
if (init.headers instanceof Headers) {
init.headers.forEach((v, k) => requestHeaders[k] = v);
} else {
requestHeaders = {...init.headers};
}
} catch(e) {}
}
try {
const response = await origFetch.apply(this, arguments);
const duration = performance.now() - startTime;
// Capture response body
let responseBody = null;
try {
const clone = response.clone();
const text = await clone.text();
responseBody = text.substring(0, 10000);
} catch(e) {}
// Capture response headers
let responseHeaders = {};
try {
response.headers.forEach((v, k) => responseHeaders[k] = v);
} catch(e) {}
window.__capturedRequests.push({
id: requestId,
timestamp: Date.now(),
type: 'fetch',
url: url,
method: method,
requestHeaders: requestHeaders,
requestBody: requestBody,
status: response.status,
statusText: response.statusText,
responseHeaders: responseHeaders,
responseBody: responseBody,
duration: Math.round(duration),
error: null
});
return response;
} catch(err) {
window.__capturedRequests.push({
id: requestId,
timestamp: Date.now(),
type: 'fetch',
url: url,
method: method,
requestHeaders: requestHeaders,
requestBody: requestBody,
status: 0,
statusText: '',
responseHeaders: {},
responseBody: null,
duration: Math.round(performance.now() - startTime),
error: err.message
});
throw err;
}
};
// === XHR INTERCEPTOR ===
const origXHR = window.XMLHttpRequest;
window.XMLHttpRequest = function() {
const xhr = new origXHR();
const data = {
id: Date.now() + '_' + Math.random().toString(36).substr(2, 9),
type: 'xhr',
timestamp: 0,
url: '',
method: '',
requestHeaders: {},
requestBody: null,
status: 0,
statusText: '',
responseHeaders: {},
responseBody: null,
duration: 0,
error: null
};
let startTime = 0;
const origOpen = xhr.open;
xhr.open = function(method, url) {
data.method = method;
data.url = url;
data.timestamp = Date.now();
startTime = performance.now();
return origOpen.apply(this, arguments);
};
const origSetHeader = xhr.setRequestHeader;
xhr.setRequestHeader = function(name, value) {
data.requestHeaders[name] = value;
return origSetHeader.apply(this, arguments);
};
const origSend = xhr.send;
xhr.send = function(body) {
if (body) {
try {
data.requestBody = String(body).substring(0, 5000);
} catch(e) {}
}
xhr.addEventListener('load', function() {
data.status = xhr.status;
data.statusText = xhr.statusText;
data.duration = Math.round(performance.now() - startTime);
try {
data.responseBody = (xhr.responseText || '').substring(0, 10000);
} catch(e) {}
try {
const headers = xhr.getAllResponseHeaders();
headers.split('\\r\\n').forEach(line => {
const [name, ...rest] = line.split(': ');
if (name) data.responseHeaders[name] = rest.join(': ');
});
} catch(e) {}
window.__capturedRequests.push({...data});
});
xhr.addEventListener('error', function() {
data.error = 'XHR Error';
data.duration = Math.round(performance.now() - startTime);
window.__capturedRequests.push({...data});
});
xhr.addEventListener('timeout', function() {
data.error = 'XHR Timeout';
data.duration = Math.round(performance.now() - startTime);
window.__capturedRequests.push({...data});
});
return origSend.apply(this, arguments);
};
return xhr;
};
console.log('[NetworkCollector] Interceptors installed');
})();
'''
try:
# Инжектим в текущую страницу
self.page.run_js(interceptor_js)
# Инжектим для новых страниц
self.page.run_cdp('Page.addScriptToEvaluateOnNewDocument', source=interceptor_js)
self.log("Interceptors injected")
except Exception as e:
self.log(f"Injection failed: {e}")
def collect(self) -> List[Dict]:
"""Собирает перехваченные запросы"""
if not self.page:
return []
try:
requests = self.page.run_js('''
const reqs = window.__capturedRequests || [];
window.__capturedRequests = [];
return reqs;
''') or []
# Добавляем в общий список
for req in requests:
self.session.all_requests.append(req)
self._step_requests.append(req)
# Логируем важные запросы
url = req.get('url', '')
if any(x in url for x in ['api/', 'signin', 'oauth', 'token', 'fingerprint']):
status = req.get('status', 0)
duration = req.get('duration', 0)
self.log(f"{req.get('method')} {url[:50]}... -> {status} ({duration}ms)")
return requests
except:
return []
def on_step_start(self, step_name: str):
"""Очищаем буфер запросов для нового шага"""
self._step_requests = []
def on_step_end(self, step):
"""Добавляем запросы к шагу"""
step.requests = self._step_requests.copy()
self._step_requests = []