|
|
<!DOCTYPE html> |
|
|
<html lang="ru"> |
|
|
<head> |
|
|
<meta charset="utf-8" /> |
|
|
<meta name="viewport" content="width=device-width,initial-scale=1" /> |
|
|
<title>Advanced Leak-based Username Generator — willhaben/target</title> |
|
|
<style> |
|
|
:root{--bg:#0b0f14;--card:#0f1720;--muted:#9aa6b2;--text:#e6eef6;--accent:#4fc3f7;--good:#7be495} |
|
|
*{box-sizing:border-box} |
|
|
html,body{height:100%;margin:0;font-family:Inter,Segoe UI,Roboto,Arial,sans-serif;background:linear-gradient(180deg,#061018,#07131a);color:var(--text)} |
|
|
.app{max-width:1100px;margin:24px auto;padding:18px;border-radius:12px;background:linear-gradient(180deg,#08121a,#07101a);box-shadow:0 8px 40px rgba(0,0,0,.6)} |
|
|
h1{margin:0 0 8px;font-size:20px} |
|
|
.grid{display:grid;grid-template-columns:360px 1fr;gap:16px} |
|
|
.card{background:var(--card);padding:14px;border-radius:10px;border:1px solid rgba(255,255,255,0.02)} |
|
|
label{display:block;font-size:13px;color:var(--muted);margin-top:10px} |
|
|
input[type="text"], input[type="number"], select, textarea{ |
|
|
width:100%;padding:10px;border-radius:8px;border:1px solid rgba(255,255,255,0.04);background:#07111a;color:var(--text);outline:none;font-size:13px |
|
|
} |
|
|
textarea{min-height:90px;resize:vertical;font-family:ui-monospace,Menlo,monospace} |
|
|
.row{display:flex;gap:8px;align-items:center} |
|
|
.btn{padding:10px 12px;border-radius:8px;border:0;background:linear-gradient(90deg,var(--accent),#7be495);color:#042028;font-weight:700;cursor:pointer} |
|
|
.btn.alt{background:transparent;border:1px solid rgba(255,255,255,0.04);color:var(--text)} |
|
|
.small{font-size:12px;color:var(--muted)} |
|
|
.stats{display:grid;grid-template-columns:repeat(2,1fr);gap:8px;margin-top:8px} |
|
|
.stat{background:linear-gradient(180deg,rgba(255,255,255,0.01),transparent);padding:8px;border-radius:8px;border:1px solid rgba(255,255,255,0.02)} |
|
|
.list{max-height:60vh;overflow:auto;padding:8px;border-radius:8px;border:1px solid rgba(255,255,255,0.02);background:#07121a} |
|
|
.pattern-item{display:flex;justify-content:space-between;align-items:center;padding:6px 8px;border-radius:6px;margin-bottom:6px;background:rgba(255,255,255,0.01)} |
|
|
.pattern-item .bar{height:8px;background:linear-gradient(90deg,var(--accent),#7be495);border-radius:6px} |
|
|
.controls{display:flex;gap:8px;flex-wrap:wrap;margin-top:10px} |
|
|
.switch{display:flex;gap:8px;align-items:center} |
|
|
.chip{padding:6px 8px;border-radius:999px;background:rgba(255,255,255,0.02);font-size:13px} |
|
|
.footer{margin-top:12px;font-size:12px;color:var(--muted)} |
|
|
input[type="file"]{color:var(--text)} |
|
|
.progress{height:10px;background:rgba(255,255,255,0.03);border-radius:6px;overflow:hidden;margin-top:8px} |
|
|
.progress > i{display:block;height:100%;width:0;background:linear-gradient(90deg,var(--accent),#7be495)} |
|
|
.table{width:100%;border-collapse:collapse;margin-top:8px} |
|
|
.table th,.table td{padding:6px;border-bottom:1px solid rgba(255,255,255,0.02);text-align:left;font-size:13px} |
|
|
.controls-right{display:flex;gap:8px;align-items:center} |
|
|
.export-btn{background:#20303b;border-radius:8px;color:var(--text);padding:8px 10px;border:1px solid rgba(255,255,255,0.03);cursor:pointer} |
|
|
.muted{color:var(--muted)} |
|
|
@media (max-width:980px){.grid{grid-template-columns:1fr}} |
|
|
</style> |
|
|
</head> |
|
|
<body> |
|
|
<div class="app" role="application" aria-label="Advanced username generator"> |
|
|
<header style="display:flex;justify-content:space-between;align-items:center;gap:12px"> |
|
|
<div> |
|
|
<h1>Advanced leak-based username generator</h1> |
|
|
<div class="small">Загрузите очищенный список логинов (email) — анализируем паттерны, домены и генерируем реалистичные вариации.</div> |
|
|
</div> |
|
|
<div class="chip">Target: willhaben / custom</div> |
|
|
</header> |
|
|
|
|
|
<div class="grid" style="margin-top:12px"> |
|
|
|
|
|
<div class="card" aria-live="polite"> |
|
|
<label>1) Загрузить файл с логинами (.txt)</label> |
|
|
<input id="fileInput" type="file" accept=".txt" /> |
|
|
<div class="small">Поддерживает большие файлы — обработка в воркере и по чанкам. Один email на строку.</div> |
|
|
<div class="progress" title="progress"><i id="fileProgress"></i></div> |
|
|
|
|
|
<label>2) Настройки анализа</label> |
|
|
<div class="small">Нормализация: диакритика → латинские аналоги, lowercase, удаление пробелов</div> |
|
|
<div style="display:flex;gap:8px;margin-top:8px"> |
|
|
<label class="switch"><input id="optNormalize" type="checkbox" checked /> <span class="small">Нормализация</span></label> |
|
|
<label class="switch"><input id="optExtractNames" type="checkbox" checked /> <span class="small">Экстракт имен/фамилий</span></label> |
|
|
</div> |
|
|
|
|
|
<label style="margin-top:10px">3) Ручной ввод слов-источников (опционально)</label> |
|
|
<div class="small">Если оставить пустыми, имена/фамилии будут извлечены из датасета</div> |
|
|
<label>Имена (через запятую)</label> |
|
|
<input id="firstSeeds" placeholder="daniel,anna,roman,kevin..." /> |
|
|
<label>Фамилии (через запятую)</label> |
|
|
<input id="lastSeeds" placeholder="medved,schmidt,pirker..." /> |
|
|
<label>Ники / популярные слова (через запятую)</label> |
|
|
<input id="nickSeeds" placeholder="shadow,ranger,stazzor,aligator..." /> |
|
|
|
|
|
<div class="controls" style="margin-top:10px"> |
|
|
<button id="analyzeBtn" class="btn alt">Анализировать файл</button> |
|
|
<button id="resetBtn" class="btn alt">Сбросить</button> |
|
|
</div> |
|
|
|
|
|
<div class="stats" style="margin-top:12px"> |
|
|
<div class="stat"><div class="small">Всего записей</div><div id="statTotal">0</div></div> |
|
|
<div class="stat"><div class="small">Уникальных локал-партий</div><div id="statUnique">0</div></div> |
|
|
<div class="stat"><div class="small">Популярный домен</div><div id="statTopDomain">—</div></div> |
|
|
<div class="stat"><div class="small">Шаблонов распознано</div><div id="statPatterns">—</div></div> |
|
|
</div> |
|
|
|
|
|
<label style="margin-top:12px">Результат анализа — паттерны (клик для выбора/исключения)</label> |
|
|
<div id="patternList" class="list" style="min-height:120px"></div> |
|
|
|
|
|
<div style="display:flex;justify-content:space-between;align-items:center;margin-top:10px"> |
|
|
<div class="small muted">Авто-расстановка весов паттернов по частоте (можно отредактировать)</div> |
|
|
<div class="small muted">Доля доменов и суффиксов вычислена из файла</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
|
|
|
<div class="card"> |
|
|
<div style="display:flex;justify-content:space-between;align-items:center"> |
|
|
<div> |
|
|
<label>4) Параметры генерации</label> |
|
|
<div class="small">Выберите паттерны и домены, затем задайте объём генерации и нажмите "Генерировать".</div> |
|
|
</div> |
|
|
<div class="controls-right"> |
|
|
<button id="openSampleBtn" class="export-btn">Показать пример</button> |
|
|
<button id="genBtn" class="btn">Генерировать</button> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div style="display:flex;gap:8px;margin-top:10px"> |
|
|
<div style="flex:1"> |
|
|
<label>Кол-во генерируемых</label> |
|
|
<input id="genCount" type="number" value="200" min="1" /> |
|
|
</div> |
|
|
<div style="width:160px"> |
|
|
<label>Домен приоритет</label> |
|
|
<select id="domainPreset"> |
|
|
<option value="auto">Авто (из файла)</option> |
|
|
<option value="local">Локальные (gmx.at, aon.at...)</option> |
|
|
<option value="global">Global (gmail,yahoo,outlook)</option> |
|
|
</select> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<label style="margin-top:10px">Настройки суффиксов / чисел</label> |
|
|
<div style="display:flex;gap:8px"> |
|
|
<input id="sufCommon" placeholder="Популярные суффиксы, через запятую (007,123,84,2005)" /> |
|
|
<input id="sufYears" placeholder="Годы (префикс/диапазон) например 1960-2005" /> |
|
|
</div> |
|
|
|
|
|
<label style="margin-top:10px">Входные источники (используются при генерации)</label> |
|
|
<div style="display:grid;grid-template-columns:1fr 1fr;gap:8px"> |
|
|
<textarea id="firstPool" placeholder="first names pool (one,comma,or newline separated)"></textarea> |
|
|
<textarea id="lastPool" placeholder="last names pool"></textarea> |
|
|
</div> |
|
|
|
|
|
<div class="controls" style="margin-top:10px"> |
|
|
<button id="previewBtn" class="btn alt">Предпросмотр 25</button> |
|
|
<button id="exportTxt" class="export-btn">Экспорт .txt</button> |
|
|
<button id="exportCsv" class="export-btn">Экспорт .csv</button> |
|
|
<button id="exportJson" class="export-btn">Экспорт .json</button> |
|
|
</div> |
|
|
|
|
|
<label style="margin-top:12px">Сгенерированные логины (дубликаты удалены автоматически)</label> |
|
|
<div id="outList" class="list" style="min-height:200px;white-space:pre-wrap;font-family:ui-monospace,Menlo,monospace"></div> |
|
|
|
|
|
<div class="footer">Гарантии: инструмент локально обрабатывает файлы в браузере. Для очень больших файлов (>100MB) рекомендуется использовать современные браузеры и больше памяти. Воркер/чанковая обработка активированы для производительности.</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<script> |
|
|
/* |
|
|
Robust client-side implementation: |
|
|
- Uses Web Worker (inline via Blob) for chunked file parsing + pattern extraction. |
|
|
- Defensive programming: try/catch, time-slicing, progress updates. |
|
|
- Pattern extraction: multiple regex classes + domain tally + suffix detection. |
|
|
- Weighted generation by observed frequencies, with user overrides. |
|
|
- Exports: txt, csv, json. Dedup & normalization. |
|
|
- All code contained in single file; no external dependencies. |
|
|
*/ |
|
|
|
|
|
/* ---------- Utils & Normalization ---------- */ |
|
|
const DIACRIT_MAP = { |
|
|
'ä':'ae','ö':'oe','ü':'ue','ß':'ss','š':'s','č':'c','ć':'c','ž':'z','á':'a','à':'a','â':'a', |
|
|
'é':'e','è':'e','ê':'e','ë':'e','í':'i','ó':'o','ò':'o','ô':'o','ñ':'n','ł':'l','ø':'o','ő':'o' |
|
|
}; |
|
|
function normalizeStr(s){ |
|
|
if(!s) return ''; |
|
|
let res = s.trim().toLowerCase(); |
|
|
// replace diacritics |
|
|
res = res.replace(/[^ -~]/g, ch => DIACRIT_MAP[ch] || ch); |
|
|
// remove invisible chars |
|
|
res = res.replace(/\s+/g, ''); |
|
|
return res; |
|
|
} |
|
|
function uniq(arr){ |
|
|
return Array.from(new Set(arr.filter(Boolean))); |
|
|
} |
|
|
function sampleWeighted(map){ |
|
|
// map: {key: weight} |
|
|
const keys = Object.keys(map); |
|
|
if(!keys.length) return null; |
|
|
const total = keys.reduce((a,k)=>a+ (map[k]||0),0); |
|
|
let r = Math.random()*total; |
|
|
for(const k of keys){ |
|
|
r -= (map[k]||0); |
|
|
if(r <= 0) return k; |
|
|
} |
|
|
return keys[ keys.length-1 ]; |
|
|
} |
|
|
|
|
|
/* ---------- Inline worker creation ---------- */ |
|
|
const workerCode = `self.onmessage = function(ev){ |
|
|
const {action, chunk, eof} = ev.data; |
|
|
if(action === 'analyzeChunk'){ |
|
|
// chunk: string (portion of file) |
|
|
// We'll split by newlines, extract emails and emit local-parts + domains + pattern counts + suffix detection |
|
|
const lines = chunk.split(/\\r?\\n/).map(l=>l.trim()).filter(Boolean); |
|
|
const domainCounts = {}; const localSet = new Set(); |
|
|
const patternCounts = {fn_dot_ln:0, fi_dot_ln:0, fnln:0, fn_digits:0, nick_digits:0, pure_nick:0, other:0}; |
|
|
const suffixCounts = {}; |
|
|
const nameCandidates = {first:{},last:{}}; |
|
|
const domainMap = {}; |
|
|
for(const raw of lines){ |
|
|
try{ |
|
|
const lower = raw.toLowerCase(); |
|
|
if(!lower.includes('@')) continue; |
|
|
const [local, domain] = lower.split('@'); |
|
|
if(!local) continue; |
|
|
localSet.add(local); |
|
|
domainCounts[domain] = (domainCounts[domain] || 0) + 1; |
|
|
// pattern detection (simple heuristics) |
|
|
// fn.ln or fn.lnNN |
|
|
if(/^[a-z]+\\.[a-z]+\\d*$/.test(local)){ |
|
|
patternCounts.fn_dot_ln++; |
|
|
const parts = local.split('.'); |
|
|
if(parts.length>=2){ |
|
|
const fn = parts[0].replace(/\\d+$/,''); const ln = parts.slice(1).join('.').replace(/\\d+$/,''); |
|
|
if(fn) nameCandidates.first[fn] = (nameCandidates.first[fn]||0)+1; |
|
|
if(ln) nameCandidates.last[ln] = (nameCandidates.last[ln]||0)+1; |
|
|
} |
|
|
} else if(/^[a-z]\\.[a-z]+\\d*$/.test(local)){ |
|
|
patternCounts.fi_dot_ln++; |
|
|
} else if(/^[a-z]+[a-z]+\\d*$/.test(local) && /[0-9]/.test(local) && /[a-z]/.test(local)){ |
|
|
// letters + digits mixed |
|
|
// differentiate nick_digits vs fn_digits heuristics by presence of dot or underscore earlier (we checked) |
|
|
patternCounts.fn_digits++; |
|
|
} else if(/^[a-z]+\\d+$/.test(local)){ |
|
|
patternCounts.nick_digits++; |
|
|
} else if(/^[a-z]+$/.test(local)){ |
|
|
patternCounts.pure_nick++; |
|
|
// candidate could be either first or last; increment in both maps for possible extraction |
|
|
nameCandidates.first[local] = (nameCandidates.first[local]||0)+1; |
|
|
nameCandidates.last[local] = (nameCandidates.last[local]||0)+1; |
|
|
} else { |
|
|
patternCounts.other++; |
|
|
} |
|
|
// suffix extraction (numbers at end) |
|
|
const m = local.match(/(\\d{1,8})$/); |
|
|
if(m){ |
|
|
const suf = m[1]; |
|
|
suffixCounts[suf] = (suffixCounts[suf]||0)+1; |
|
|
} |
|
|
domainMap[domain] = (domainMap[domain]||0)+1; |
|
|
}catch(e){/*ignore per-line errors*/ } |
|
|
} |
|
|
// respond with partial results |
|
|
self.postMessage({action:'chunkResult',domainCounts,patternCounts,localCount: localSet.size,suffixCounts,nameCandidates,domainMap}); |
|
|
if(eof) self.postMessage({action:'done'}); |
|
|
} |
|
|
};`; |
|
|
const workerBlob = new Blob([workerCode], {type:'application/javascript'}); |
|
|
const workerUrl = URL.createObjectURL(workerBlob); |
|
|
|
|
|
/* ---------- State ---------- */ |
|
|
let analysisState = { |
|
|
totalLines:0, |
|
|
uniqueLocal:0, |
|
|
domainCounts:{}, |
|
|
patternCounts:{fn_dot_ln:0,fi_dot_ln:0,fnln:0,fn_digits:0,nick_digits:0,pure_nick:0,other:0}, |
|
|
suffixCounts:{}, |
|
|
nameCandidates:{first:{},last:{}}, |
|
|
domainMap:{} |
|
|
}; |
|
|
let lastGenerated = []; |
|
|
|
|
|
/* ---------- DOM refs ---------- */ |
|
|
const fileInput = document.getElementById('fileInput'); |
|
|
const analyzeBtn = document.getElementById('analyzeBtn'); |
|
|
const resetBtn = document.getElementById('resetBtn'); |
|
|
const patternList = document.getElementById('patternList'); |
|
|
const fileProgress = document.getElementById('fileProgress'); |
|
|
const statTotal = document.getElementById('statTotal'); |
|
|
const statUnique = document.getElementById('statUnique'); |
|
|
const statTopDomain = document.getElementById('statTopDomain'); |
|
|
const statPatterns = document.getElementById('statPatterns'); |
|
|
const optNormalize = document.getElementById('optNormalize'); |
|
|
const optExtractNames = document.getElementById('optExtractNames'); |
|
|
|
|
|
const genBtn = document.getElementById('genBtn'); |
|
|
const previewBtn = document.getElementById('previewBtn'); |
|
|
const openSampleBtn = document.getElementById('openSampleBtn'); |
|
|
const genCountInput = document.getElementById('genCount'); |
|
|
const domainPreset = document.getElementById('domainPreset'); |
|
|
const sufCommon = document.getElementById('sufCommon'); |
|
|
const sufYears = document.getElementById('sufYears'); |
|
|
const firstPoolTA = document.getElementById('firstPool'); |
|
|
const lastPoolTA = document.getElementById('lastPool'); |
|
|
const outList = document.getElementById('outList'); |
|
|
const exportTxt = document.getElementById('exportTxt'); |
|
|
const exportCsv = document.getElementById('exportCsv'); |
|
|
const exportJson = document.getElementById('exportJson'); |
|
|
const analyzeProgress = fileProgress; |
|
|
|
|
|
/* ---------- File parsing & analysis (chunked with worker) ---------- */ |
|
|
function resetAnalysis(){ |
|
|
analysisState = { |
|
|
totalLines:0, uniqueLocal:0, domainCounts:{}, patternCounts:{fn_dot_ln:0,fi_dot_ln:0,fnln:0,fn_digits:0,nick_digits:0,pure_nick:0,other:0}, suffixCounts:{}, nameCandidates:{first:{},last:{}}, domainMap:{} |
|
|
}; |
|
|
patternList.innerHTML = ''; |
|
|
statTotal.textContent = '0'; statUnique.textContent = '0'; statTopDomain.textContent = '—'; statPatterns.textContent = '—'; |
|
|
outList.textContent = ''; |
|
|
lastGenerated = []; |
|
|
fileProgress.style.width = '0%'; |
|
|
} |
|
|
|
|
|
resetBtn.addEventListener('click', ()=>{ resetAnalysis(); fileInput.value=''; }); |
|
|
|
|
|
analyzeBtn.addEventListener('click', ()=>{ |
|
|
const file = fileInput.files && fileInput.files[0]; |
|
|
if(!file){ alert('Выберите .txt файл с логинами первым.'); return; } |
|
|
resetAnalysis(); |
|
|
analyzeFileChunked(file); |
|
|
}); |
|
|
|
|
|
function analyzeFileChunked(file){ |
|
|
const worker = new Worker(workerUrl); |
|
|
const CHUNK_SIZE = 2 * 1024 * 1024; // 2MB chunk |
|
|
let offset = 0; |
|
|
let partial = ''; |
|
|
const reader = new FileReader(); |
|
|
|
|
|
worker.onmessage = function(ev){ |
|
|
const data = ev.data; |
|
|
if(data.action === 'chunkResult'){ |
|
|
// merge into analysisState |
|
|
mergeCounts(analysisState, data); |
|
|
updateStatsUI(); |
|
|
} else if(data.action === 'done'){ |
|
|
// finalize |
|
|
finalizeAnalysis(); |
|
|
worker.terminate(); |
|
|
} |
|
|
}; |
|
|
|
|
|
reader.onerror = err => { alert('Ошибка чтения файла: '+ err); worker.terminate(); }; |
|
|
reader.onload = function(e){ |
|
|
try{ |
|
|
let text = e.target.result; |
|
|
// prepend partial leftover |
|
|
text = partial + text; |
|
|
// try to keep last line partial if file continues |
|
|
const lastNewline = text.lastIndexOf('\\n'); |
|
|
let chunkToSend = text; |
|
|
if(lastNewline !== -1 && offset + CHUNK_SIZE < file.size){ |
|
|
chunkToSend = text.slice(0, lastNewline+1); |
|
|
partial = text.slice(lastNewline+1); |
|
|
} else { // final chunk or small file |
|
|
partial = ''; |
|
|
} |
|
|
const eof = (offset + CHUNK_SIZE) >= file.size; |
|
|
worker.postMessage({action:'analyzeChunk', chunk:chunkToSend, eof}); |
|
|
offset += CHUNK_SIZE; |
|
|
// update progress |
|
|
const pct = Math.min(100, Math.round((offset / file.size) * 100)); |
|
|
fileProgress.style.width = pct + '%'; |
|
|
if(offset < file.size){ |
|
|
readSlice(); |
|
|
} else { |
|
|
// done reading |
|
|
} |
|
|
}catch(err){ console.error(err); worker.terminate(); alert('Ошибка обработки чанка: '+err); } |
|
|
}; |
|
|
|
|
|
function readSlice(){ |
|
|
const slice = file.slice(offset, offset + CHUNK_SIZE); |
|
|
reader.readAsText(slice); |
|
|
} |
|
|
// start |
|
|
readSlice(); |
|
|
} |
|
|
|
|
|
/* merge worker results into analysisState */ |
|
|
function mergeCounts(state, data){ |
|
|
// domains |
|
|
for(const d in data.domainCounts){ |
|
|
state.domainCounts[d] = (state.domainCounts[d] || 0) + data.domainCounts[d]; |
|
|
} |
|
|
// patterns |
|
|
for(const k in state.patternCounts){ |
|
|
state.patternCounts[k] = (state.patternCounts[k] || 0) + (data.patternCounts[k] || 0); |
|
|
} |
|
|
// suffixes |
|
|
for(const s in data.suffixCounts){ |
|
|
state.suffixCounts[s] = (state.suffixCounts[s] || 0) + data.suffixCounts[s]; |
|
|
} |
|
|
// names |
|
|
['first','last'].forEach(kind=>{ |
|
|
const cand = data.nameCandidates?.[kind] || {}; |
|
|
for(const nm in cand){ |
|
|
state.nameCandidates[kind][nm] = (state.nameCandidates[kind][nm]||0) + cand[nm]; |
|
|
} |
|
|
}); |
|
|
// unique local count approximation (we sum partial unique counts, but we will recompute accurately later if needed) |
|
|
state.uniqueLocal += data.localCount || 0; |
|
|
// domainMap |
|
|
for(const d in data.domainMap){ |
|
|
state.domainMap[d] = (state.domainMap[d] || 0) + data.domainMap[d]; |
|
|
} |
|
|
} |
|
|
|
|
|
/* update compact UI */ |
|
|
function updateStatsUI(){ |
|
|
const total = Object.values(analysisState.domainMap).reduce((a,b)=>a+b,0); |
|
|
statTotal.textContent = total || '0'; |
|
|
statUnique.textContent = analysisState.uniqueLocal || '0'; |
|
|
// top domain |
|
|
const domainEntries = Object.entries(analysisState.domainCounts||{}).sort((a,b)=>b[1]-a[1]); |
|
|
statTopDomain.textContent = domainEntries.length ? `${domainEntries[0][0]} (${domainEntries[0][1]})` : '—'; |
|
|
// patterns summary |
|
|
const pc = analysisState.patternCounts; |
|
|
const sumP = Object.values(pc).reduce((a,b)=>a+b,0) || 0; |
|
|
statPatterns.textContent = sumP ? Object.entries(pc).map(([k,v])=>`${k}:${v}`).join(' | ') : '—'; |
|
|
// render pattern list interactive |
|
|
renderPatternList(pc); |
|
|
} |
|
|
|
|
|
/* render interactive pattern list with checkboxes and weight sliders */ |
|
|
function renderPatternList(patternCounts){ |
|
|
patternList.innerHTML = ''; |
|
|
const total = Object.values(patternCounts).reduce((a,b)=>a+b,0) || 1; |
|
|
for(const [k,v] of Object.entries(patternCounts)){ |
|
|
const pct = Math.round((v/total)*100); |
|
|
const item = document.createElement('div'); |
|
|
item.className = 'pattern-item'; |
|
|
item.innerHTML = \` |
|
|
<div style="display:flex;gap:10px;align-items:center"> |
|
|
<input type="checkbox" data-pattern="\${k}" checked /> |
|
|
<div style="min-width:120px"><strong>\${k}</strong></div> |
|
|
<div class="small muted">\${v} hits</div> |
|
|
</div> |
|
|
<div style="width:40%"> |
|
|
<div style="height:8px;background:rgba(255,255,255,0.03);border-radius:6px;overflow:hidden"> |
|
|
<div class="bar" style="width:\${pct}%;"></div> |
|
|
</div> |
|
|
</div> |
|
|
\`; |
|
|
patternList.appendChild(item); |
|
|
} |
|
|
} |
|
|
|
|
|
/* finalize analysis: compute derived pools */ |
|
|
function finalizeAnalysis(){ |
|
|
// compute normalized name pools from analysisState.nameCandidates (top N) |
|
|
const firsts = Object.entries(analysisState.nameCandidates.first || {}).sort((a,b)=>b[1]-a[1]).slice(0,200).map(x=>normalizeStr(x[0])); |
|
|
const lasts = Object.entries(analysisState.nameCandidates.last || {}).sort((a,b)=>b[1]-a[1]).slice(0,200).map(x=>normalizeStr(x[0])); |
|
|
// put into textareas only if they are empty (user may override) |
|
|
if(!firstPoolTA.value.trim()){ |
|
|
firstPoolTA.value = firsts.join('\\n'); |
|
|
} |
|
|
if(!lastPoolTA.value.trim()){ |
|
|
lastPoolTA.value = lasts.join('\\n'); |
|
|
} |
|
|
// preset suffix common list |
|
|
const topSuffixes = Object.entries(analysisState.suffixCounts || {}).sort((a,b)=>b[1]-a[1]).slice(0,20).map(x=>x[0]); |
|
|
sufCommon.value = topSuffixes.slice(0,12).join(','); |
|
|
updateStatsUI(); |
|
|
alert('Анализ завершён. Проверьте автоматически заполненные пулы имён и фамилий. Отредактируйте при необходимости и нажмите "Генерировать".'); |
|
|
} |
|
|
|
|
|
/* ---------- Generation logic ---------- */ |
|
|
function getSelectedPatterns(){ |
|
|
return Array.from(patternList.querySelectorAll('input[type="checkbox"]:checked')).map(cb=>cb.dataset.pattern); |
|
|
} |
|
|
function getDomainDistribution(){ |
|
|
// depending on preset |
|
|
const preset = domainPreset.value; |
|
|
const domainCounts = analysisState.domainCounts || {}; |
|
|
if(preset === 'auto'){ |
|
|
return normalizeDistribution(domainCounts); |
|
|
} |
|
|
// define some domain groups |
|
|
const local = ['gmx.at','aon.at','chello.at','liwest.at','inode.at','student.uibk.ac.at','proton.me','protonmail.com','medundmed.at','drei.at','tmo.at']; |
|
|
const global = ['gmail.com','yahoo.com','outlook.com','hotmail.com','live.com','googlemail.com','msn.com','ymail.com']; |
|
|
const dist = {}; |
|
|
if(preset === 'local'){ |
|
|
local.forEach(d=>dist[d]=1); |
|
|
} else { |
|
|
global.forEach(d=>dist[d]=1); |
|
|
} |
|
|
return normalizeDistribution(dist); |
|
|
} |
|
|
function normalizeDistribution(map){ |
|
|
const m = {}; |
|
|
const keys = Object.keys(map); |
|
|
if(!keys.length) return {'gmail.com':1}; |
|
|
let total = 0; |
|
|
for(const k of keys){ m[k] = Number(map[k]||0); total += m[k]; } |
|
|
if(total === 0){ |
|
|
// fallback: equal weights |
|
|
keys.forEach(k=>m[k]=1); |
|
|
total = keys.length; |
|
|
} |
|
|
// return normalized weights (not necessary but keep numbers) |
|
|
return m; |
|
|
} |
|
|
|
|
|
/* parse pools */ |
|
|
function parsePool(text){ |
|
|
if(!text) return []; |
|
|
const arr = text.split(/[\\n,;]+/).map(s=>normalizeStr(s)).filter(Boolean); |
|
|
return uniq(arr); |
|
|
} |
|
|
|
|
|
/* build pattern application functions */ |
|
|
function buildGenerators(selectedPatterns, firstPool, lastPool, nickPool, suffixList, yearRange, domainWeights){ |
|
|
const gens = []; |
|
|
// helper small funcs |
|
|
const rnd = arr => arr[Math.floor(Math.random()*arr.length)]; |
|
|
const pickDomain = ()=> sampleWeighted(domainWeights) || 'gmail.com'; |
|
|
const pickSuffix = ()=> suffixList.length ? suffixList[Math.floor(Math.random()*suffixList.length)] : ''; |
|
|
const pickYearSuffix = ()=>{ |
|
|
if(!yearRange) return ''; |
|
|
const [a,b] = yearRange; |
|
|
const y = a + Math.floor(Math.random()*(b-a+1)); |
|
|
return String(y); |
|
|
}; |
|
|
const maybeNum = ()=>{ |
|
|
if(Math.random()<0.45){ |
|
|
if(Math.random()<0.5) return pickSuffix(); |
|
|
return pickYearSuffix(); |
|
|
} |
|
|
return ''; |
|
|
}; |
|
|
|
|
|
for(const p of selectedPatterns){ |
|
|
if(p === 'fn_dot_ln'){ |
|
|
gens.push(()=>{ |
|
|
const f = rnd(firstPool); const l = rnd(lastPool); |
|
|
let local = `${f}.${l}`; |
|
|
if(Math.random()<0.4){ local += maybeNum(); } |
|
|
return `${local}@${pickDomain()}`; |
|
|
}); |
|
|
} else if(p === 'fi_dot_ln'){ |
|
|
gens.push(()=>{ |
|
|
const f = rnd(firstPool); const l = rnd(lastPool); |
|
|
let local = `${f[0]}.${l}`; |
|
|
if(Math.random()<0.3){ local += maybeNum(); } |
|
|
return `${local}@${pickDomain()}`; |
|
|
}); |
|
|
} else if(p === 'fnln'){ |
|
|
gens.push(()=>{ |
|
|
const f = rnd(firstPool); const l = rnd(lastPool); |
|
|
let local = `${f}${l}`; |
|
|
if(Math.random()<0.35){ local += maybeNum(); } |
|
|
return `${local}@${pickDomain()}`; |
|
|
}); |
|
|
} else if(p === 'fn_digits' || p === 'nick_digits'){ |
|
|
gens.push(()=>{ |
|
|
const chooseNick = Math.random()<0.5; |
|
|
const base = chooseNick ? (rnd(nickPool)||rnd(firstPool)||'user') : (rnd(firstPool)+ (Math.random()<0.3?'.':'' ) + (rnd(lastPool)||'')); |
|
|
const su = maybeNum() || pickSuffix(); |
|
|
const local = base + su; |
|
|
return `${local}@${pickDomain()}`; |
|
|
}); |
|
|
} else if(p === 'pure_nick'){ |
|
|
gens.push(()=>{ |
|
|
const base = rnd(nickPool) || rnd(firstPool) || 'user'; |
|
|
const local = (Math.random()<0.35) ? (base + maybeNum()) : base; |
|
|
return `${local}@${pickDomain()}`; |
|
|
}); |
|
|
} else { |
|
|
// fallback generic |
|
|
gens.push(()=>{ |
|
|
const f = rnd(firstPool) || 'john'; const l = rnd(lastPool) || 'doe'; |
|
|
let local = `${f}.${l}`; |
|
|
if(Math.random()<0.4) local += maybeNum(); |
|
|
return `${local}@${pickDomain()}`; |
|
|
}); |
|
|
} |
|
|
} |
|
|
// ensure at least one generator |
|
|
if(gens.length === 0){ |
|
|
gens.push(()=>{ |
|
|
const f = rnd(firstPool) || 'john'; const l = rnd(lastPool) || 'doe'; |
|
|
return `${f}.${l}@gmail.com`; |
|
|
}); |
|
|
} |
|
|
return gens; |
|
|
} |
|
|
|
|
|
/* parse year range string like "1960-2005" */ |
|
|
function parseYearRange(s){ |
|
|
if(!s) return null; |
|
|
const m = s.match(/(\\d{3,4})\\s*-\\s*(\\d{3,4})/); |
|
|
if(m){ |
|
|
const a = Math.max(1900, Number(m[1])); |
|
|
const b = Math.min(2100, Number(m[2])); |
|
|
if(a<=b) return [a,b]; |
|
|
} |
|
|
return null; |
|
|
} |
|
|
|
|
|
/* generator driver */ |
|
|
function generateList(count, options = {}){ |
|
|
const selectedPatterns = getSelectedPatterns(); |
|
|
const firstPool = parsePool(firstPoolTA.value) .length ? parsePool(firstPoolTA.value) : parsePool(document.getElementById('firstSeeds').value); |
|
|
const lastPool = parsePool(lastPoolTA.value) .length ? parsePool(lastPoolTA.value) : parsePool(document.getElementById('lastSeeds').value); |
|
|
// fallback: if pools empty, derive from analysis top candidates |
|
|
const fPool = firstPool.length ? firstPool : Object.keys(analysisState.nameCandidates.first || {}).slice(0,200).map(k=>normalizeStr(k)); |
|
|
const lPool = lastPool.length ? lastPool : Object.keys(analysisState.nameCandidates.last || {}).slice(0,200).map(k=>normalizeStr(k)); |
|
|
const nickPool = parsePool(document.getElementById('nickSeeds').value) .length ? parsePool(document.getElementById('nickSeeds').value) : (Object.keys(analysisState.nameCandidates.first||{}).slice(0,200).map(k=>normalizeStr(k))); |
|
|
const domainWeights = getDomainDistribution(); |
|
|
const suffixList = sufCommon.value ? uniq(sufCommon.value.split(/[\\n,;]+/).map(s=>s.trim()).filter(Boolean)) : Object.keys(analysisState.suffixCounts||{}).slice(0,20); |
|
|
const yearRange = parseYearRange(sufYears.value) || [1970,2005]; |
|
|
const gens = buildGenerators(selectedPatterns, fPool, lPool, nickPool, suffixList, yearRange, domainWeights); |
|
|
|
|
|
const out = new Set(); |
|
|
// Weighted selection among generators proportional to patternCounts |
|
|
// Build generator weights map |
|
|
const genMap = {}; |
|
|
const patt = analysisState.patternCounts || {}; |
|
|
const selectedCounts = selectedPatterns.reduce((acc,k)=>{ acc[k] = patt[k]||1; return acc; }, {}); |
|
|
// create array of gen funcs repeated proportionally to selectedCounts |
|
|
const genFuncs = []; |
|
|
for(const p of selectedPatterns){ |
|
|
const idx = selectedPatterns.indexOf(p); |
|
|
// find corresponding gens by same order in buildGenerators; simple mapping: gens[i] corresponds to selectedPatterns[i] |
|
|
} |
|
|
// Instead use sampleWeighted by pattern counts to pick a pattern each iteration, then choose corresponding generator from gens array |
|
|
const patternWeightMap = {}; |
|
|
selectedPatterns.forEach((p,i)=> patternWeightMap[p] = (analysisState.patternCounts[p] || 1) ); |
|
|
|
|
|
// generation loop with dedup and safety cap |
|
|
const CAP = Math.max(count*10, 2000); // attempts cap to avoid infinite loops |
|
|
let attempts = 0; |
|
|
while(out.size < count && attempts < CAP){ |
|
|
attempts++; |
|
|
const chosenPattern = sampleWeighted(patternWeightMap) || selectedPatterns[Math.floor(Math.random()*selectedPatterns.length)]; |
|
|
// find generator for that pattern (we built gens in the same order as selectedPatterns) |
|
|
const idx = selectedPatterns.indexOf(chosenPattern); |
|
|
const genFunc = (idx>=0 && idx < gens.length) ? gens[idx] : gens[Math.floor(Math.random()*gens.length)]; |
|
|
try{ |
|
|
const val = genFunc(); |
|
|
if(val && typeof val === 'string'){ |
|
|
const cleaned = normalizeStrEmail(val); |
|
|
out.add(cleaned); |
|
|
} |
|
|
}catch(e){ console.error('generator error', e); } |
|
|
} |
|
|
lastGenerated = Array.from(out); |
|
|
// show results |
|
|
renderOutput(lastGenerated); |
|
|
return lastGenerated; |
|
|
} |
|
|
|
|
|
/* email normalization: keep local part allowed characters and domain as is */ |
|
|
function normalizeStrEmail(email){ |
|
|
let parts = String(email).trim().toLowerCase().split('@'); |
|
|
if(parts.length < 2) return email; |
|
|
let local = parts.slice(0,parts.length-1).join('@'); // in case local had @ (rare) |
|
|
const domain = parts[parts.length-1].trim(); |
|
|
// replace diacritics in local |
|
|
local = local.replace(/[^ -~]/g, ch => DIACRIT_MAP[ch] || ch); |
|
|
// allowed chars for local: a-z0-9._-+ (we keep plus signs too as they appear in data) |
|
|
local = local.replace(/[^a-z0-9._\\-+]/g,''); |
|
|
// avoid leading/trailing dot |
|
|
local = local.replace(/^\\.+|\\.+$/g,''); |
|
|
return local + '@' + domain; |
|
|
} |
|
|
|
|
|
/* render output */ |
|
|
function renderOutput(list){ |
|
|
outList.innerText = list.join('\\n'); |
|
|
} |
|
|
|
|
|
/* ---------- UI events ---------- */ |
|
|
previewBtn.addEventListener('click', ()=>{ |
|
|
const res = generateList(Math.min(25, Number(genCountInput.value) || 25)); |
|
|
alert('Предпросмотр: ' + res.length + ' записей сгенерировано (показаны в списке).'); |
|
|
}); |
|
|
|
|
|
genBtn.addEventListener('click', ()=>{ |
|
|
const cnt = Math.max(1, Number(genCountInput.value) || 100); |
|
|
generateList(cnt); |
|
|
alert('Генерация завершена: ' + lastGenerated.length + ' уникальных записей.'); |
|
|
}); |
|
|
|
|
|
openSampleBtn.addEventListener('click', ()=>{ |
|
|
// show sample from analysis if available |
|
|
const sample = Object.keys(analysisState.domainMap||{}).slice(0,10).map(d=>d+': '+(analysisState.domainMap[d]||0)).join('\\n'); |
|
|
alert('Top domains samples:\\n' + sample); |
|
|
}); |
|
|
|
|
|
/* exports */ |
|
|
function download(filename, text){ |
|
|
const blob = new Blob([text], {type:'text/plain;charset=utf-8'}); |
|
|
const a = document.createElement('a'); |
|
|
a.href = URL.createObjectURL(blob); |
|
|
a.download = filename; |
|
|
document.body.appendChild(a); a.click(); |
|
|
setTimeout(()=>{ URL.revokeObjectURL(a.href); a.remove(); }, 100); |
|
|
} |
|
|
exportTxt.addEventListener('click', ()=> { |
|
|
if(!lastGenerated.length){ alert('Нет сгенерированных данных.'); return; } |
|
|
download('generated_usernames.txt', lastGenerated.join('\\n')); |
|
|
}); |
|
|
exportCsv.addEventListener('click', ()=> { |
|
|
if(!lastGenerated.length){ alert('Нет сгенерированных данных.'); return; } |
|
|
// simple CSV with column email |
|
|
const csv = 'email\\n' + lastGenerated.map(e=>`"\${e.replace(/"/g,'""')}"`).join('\\n'); |
|
|
download('generated_usernames.csv', csv); |
|
|
}); |
|
|
exportJson.addEventListener('click', ()=> { |
|
|
if(!lastGenerated.length){ alert('Нет сгенерированных данных.'); return; } |
|
|
download('generated_usernames.json', JSON.stringify(lastGenerated, null, 2)); |
|
|
}); |
|
|
|
|
|
/* ---------- Helpers for UI and sanity ---------- */ |
|
|
function safeParseInt(v, d){ const n = parseInt(v,10); return isNaN(n)? d : n; } |
|
|
|
|
|
/* ---------- Initialize small defaults ---------- */ |
|
|
(function initDefaults(){ |
|
|
// prefill sufYears example |
|
|
sufYears.placeholder = 'Пример: 1960-2005'; |
|
|
sufCommon.placeholder = 'Например: 007,123,84,2005'; |
|
|
})(); |
|
|
|
|
|
/* ---------- End of script ---------- */ |
|
|
</script> |
|
|
</body> |
|
|
</html> |