| <!DOCTYPE html> |
| <html lang="ru"> |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width,initial-scale=1" /> |
| <title>Advanced Leak-based Username Generator — willhaben/target</title> |
| <style> |
| :root{--bg:#0b0f14;--card:#0f1720;--muted:#9aa6b2;--text:#e6eef6;--accent:#4fc3f7;--good:#7be495} |
| *{box-sizing:border-box} |
| html,body{height:100%;margin:0;font-family:Inter,Segoe UI,Roboto,Arial,sans-serif;background:linear-gradient(180deg,#061018,#07131a);color:var(--text)} |
| .app{max-width:1100px;margin:24px auto;padding:18px;border-radius:12px;background:linear-gradient(180deg,#08121a,#07101a);box-shadow:0 8px 40px rgba(0,0,0,.6)} |
| h1{margin:0 0 8px;font-size:20px} |
| .grid{display:grid;grid-template-columns:360px 1fr;gap:16px} |
| .card{background:var(--card);padding:14px;border-radius:10px;border:1px solid rgba(255,255,255,0.02)} |
| label{display:block;font-size:13px;color:var(--muted);margin-top:10px} |
| input[type="text"], input[type="number"], select, textarea{ |
| width:100%;padding:10px;border-radius:8px;border:1px solid rgba(255,255,255,0.04);background:#07111a;color:var(--text);outline:none;font-size:13px |
| } |
| textarea{min-height:90px;resize:vertical;font-family:ui-monospace,Menlo,monospace} |
| .row{display:flex;gap:8px;align-items:center} |
| .btn{padding:10px 12px;border-radius:8px;border:0;background:linear-gradient(90deg,var(--accent),#7be495);color:#042028;font-weight:700;cursor:pointer} |
| .btn.alt{background:transparent;border:1px solid rgba(255,255,255,0.04);color:var(--text)} |
| .small{font-size:12px;color:var(--muted)} |
| .stats{display:grid;grid-template-columns:repeat(2,1fr);gap:8px;margin-top:8px} |
| .stat{background:linear-gradient(180deg,rgba(255,255,255,0.01),transparent);padding:8px;border-radius:8px;border:1px solid rgba(255,255,255,0.02)} |
| .list{max-height:60vh;overflow:auto;padding:8px;border-radius:8px;border:1px solid rgba(255,255,255,0.02);background:#07121a} |
| .pattern-item{display:flex;justify-content:space-between;align-items:center;padding:6px 8px;border-radius:6px;margin-bottom:6px;background:rgba(255,255,255,0.01)} |
| .pattern-item .bar{height:8px;background:linear-gradient(90deg,var(--accent),#7be495);border-radius:6px} |
| .controls{display:flex;gap:8px;flex-wrap:wrap;margin-top:10px} |
| .switch{display:flex;gap:8px;align-items:center} |
| .chip{padding:6px 8px;border-radius:999px;background:rgba(255,255,255,0.02);font-size:13px} |
| .footer{margin-top:12px;font-size:12px;color:var(--muted)} |
| input[type="file"]{color:var(--text)} |
| .progress{height:10px;background:rgba(255,255,255,0.03);border-radius:6px;overflow:hidden;margin-top:8px} |
| .progress > i{display:block;height:100%;width:0;background:linear-gradient(90deg,var(--accent),#7be495)} |
| .table{width:100%;border-collapse:collapse;margin-top:8px} |
| .table th,.table td{padding:6px;border-bottom:1px solid rgba(255,255,255,0.02);text-align:left;font-size:13px} |
| .controls-right{display:flex;gap:8px;align-items:center} |
| .export-btn{background:#20303b;border-radius:8px;color:var(--text);padding:8px 10px;border:1px solid rgba(255,255,255,0.03);cursor:pointer} |
| .muted{color:var(--muted)} |
| @media (max-width:980px){.grid{grid-template-columns:1fr}} |
| </style> |
| </head> |
| <body> |
| <div class="app" role="application" aria-label="Advanced username generator"> |
| <header style="display:flex;justify-content:space-between;align-items:center;gap:12px"> |
| <div> |
| <h1>Advanced leak-based username generator</h1> |
| <div class="small">Загрузите очищенный список логинов (email) — анализируем паттерны, домены и генерируем реалистичные вариации.</div> |
| </div> |
| <div class="chip">Target: willhaben / custom</div> |
| </header> |
|
|
| <div class="grid" style="margin-top:12px"> |
| |
| <div class="card" aria-live="polite"> |
| <label>1) Загрузить файл с логинами (.txt)</label> |
| <input id="fileInput" type="file" accept=".txt" /> |
| <div class="small">Поддерживает большие файлы — обработка в воркере и по чанкам. Один email на строку.</div> |
| <div class="progress" title="progress"><i id="fileProgress"></i></div> |
|
|
| <label>2) Настройки анализа</label> |
| <div class="small">Нормализация: диакритика → латинские аналоги, lowercase, удаление пробелов</div> |
| <div style="display:flex;gap:8px;margin-top:8px"> |
| <label class="switch"><input id="optNormalize" type="checkbox" checked /> <span class="small">Нормализация</span></label> |
| <label class="switch"><input id="optExtractNames" type="checkbox" checked /> <span class="small">Экстракт имен/фамилий</span></label> |
| </div> |
|
|
| <label style="margin-top:10px">3) Ручной ввод слов-источников (опционально)</label> |
| <div class="small">Если оставить пустыми, имена/фамилии будут извлечены из датасета</div> |
| <label>Имена (через запятую)</label> |
| <input id="firstSeeds" placeholder="daniel,anna,roman,kevin..." /> |
| <label>Фамилии (через запятую)</label> |
| <input id="lastSeeds" placeholder="medved,schmidt,pirker..." /> |
| <label>Ники / популярные слова (через запятую)</label> |
| <input id="nickSeeds" placeholder="shadow,ranger,stazzor,aligator..." /> |
|
|
| <div class="controls" style="margin-top:10px"> |
| <button id="analyzeBtn" class="btn alt">Анализировать файл</button> |
| <button id="resetBtn" class="btn alt">Сбросить</button> |
| </div> |
|
|
| <div class="stats" style="margin-top:12px"> |
| <div class="stat"><div class="small">Всего записей</div><div id="statTotal">0</div></div> |
| <div class="stat"><div class="small">Уникальных локал-партий</div><div id="statUnique">0</div></div> |
| <div class="stat"><div class="small">Популярный домен</div><div id="statTopDomain">—</div></div> |
| <div class="stat"><div class="small">Шаблонов распознано</div><div id="statPatterns">—</div></div> |
| </div> |
|
|
| <label style="margin-top:12px">Результат анализа — паттерны (клик для выбора/исключения)</label> |
| <div id="patternList" class="list" style="min-height:120px"></div> |
|
|
| <div style="display:flex;justify-content:space-between;align-items:center;margin-top:10px"> |
| <div class="small muted">Авто-расстановка весов паттернов по частоте (можно отредактировать)</div> |
| <div class="small muted">Доля доменов и суффиксов вычислена из файла</div> |
| </div> |
| </div> |
|
|
| |
| <div class="card"> |
| <div style="display:flex;justify-content:space-between;align-items:center"> |
| <div> |
| <label>4) Параметры генерации</label> |
| <div class="small">Выберите паттерны и домены, затем задайте объём генерации и нажмите "Генерировать".</div> |
| </div> |
| <div class="controls-right"> |
| <button id="openSampleBtn" class="export-btn">Показать пример</button> |
| <button id="genBtn" class="btn">Генерировать</button> |
| </div> |
| </div> |
|
|
| <div style="display:flex;gap:8px;margin-top:10px"> |
| <div style="flex:1"> |
| <label>Кол-во генерируемых</label> |
| <input id="genCount" type="number" value="200" min="1" /> |
| </div> |
| <div style="width:160px"> |
| <label>Домен приоритет</label> |
| <select id="domainPreset"> |
| <option value="auto">Авто (из файла)</option> |
| <option value="local">Локальные (gmx.at, aon.at...)</option> |
| <option value="global">Global (gmail,yahoo,outlook)</option> |
| </select> |
| </div> |
| </div> |
|
|
| <label style="margin-top:10px">Настройки суффиксов / чисел</label> |
| <div style="display:flex;gap:8px"> |
| <input id="sufCommon" placeholder="Популярные суффиксы, через запятую (007,123,84,2005)" /> |
| <input id="sufYears" placeholder="Годы (префикс/диапазон) например 1960-2005" /> |
| </div> |
|
|
| <label style="margin-top:10px">Входные источники (используются при генерации)</label> |
| <div style="display:grid;grid-template-columns:1fr 1fr;gap:8px"> |
| <textarea id="firstPool" placeholder="first names pool (one,comma,or newline separated)"></textarea> |
| <textarea id="lastPool" placeholder="last names pool"></textarea> |
| </div> |
|
|
| <div class="controls" style="margin-top:10px"> |
| <button id="previewBtn" class="btn alt">Предпросмотр 25</button> |
| <button id="exportTxt" class="export-btn">Экспорт .txt</button> |
| <button id="exportCsv" class="export-btn">Экспорт .csv</button> |
| <button id="exportJson" class="export-btn">Экспорт .json</button> |
| </div> |
|
|
| <label style="margin-top:12px">Сгенерированные логины (дубликаты удалены автоматически)</label> |
| <div id="outList" class="list" style="min-height:200px;white-space:pre-wrap;font-family:ui-monospace,Menlo,monospace"></div> |
|
|
| <div class="footer">Гарантии: инструмент локально обрабатывает файлы в браузере. Для очень больших файлов (>100MB) рекомендуется использовать современные браузеры и больше памяти. Воркер/чанковая обработка активированы для производительности.</div> |
| </div> |
| </div> |
| </div> |
|
|
| <script> |
| /* |
| Robust client-side implementation: |
| - Uses Web Worker (inline via Blob) for chunked file parsing + pattern extraction. |
| - Defensive programming: try/catch, time-slicing, progress updates. |
| - Pattern extraction: multiple regex classes + domain tally + suffix detection. |
| - Weighted generation by observed frequencies, with user overrides. |
| - Exports: txt, csv, json. Dedup & normalization. |
| - All code contained in single file; no external dependencies. |
| */ |
| |
| /* ---------- Utils & Normalization ---------- */ |
| const DIACRIT_MAP = { |
| 'ä':'ae','ö':'oe','ü':'ue','ß':'ss','š':'s','č':'c','ć':'c','ž':'z','á':'a','à':'a','â':'a', |
| 'é':'e','è':'e','ê':'e','ë':'e','í':'i','ó':'o','ò':'o','ô':'o','ñ':'n','ł':'l','ø':'o','ő':'o' |
| }; |
| function normalizeStr(s){ |
| if(!s) return ''; |
| let res = s.trim().toLowerCase(); |
| // replace diacritics |
| res = res.replace(/[^ -~]/g, ch => DIACRIT_MAP[ch] || ch); |
| // remove invisible chars |
| res = res.replace(/\s+/g, ''); |
| return res; |
| } |
| function uniq(arr){ |
| return Array.from(new Set(arr.filter(Boolean))); |
| } |
| function sampleWeighted(map){ |
| // map: {key: weight} |
| const keys = Object.keys(map); |
| if(!keys.length) return null; |
| const total = keys.reduce((a,k)=>a+ (map[k]||0),0); |
| let r = Math.random()*total; |
| for(const k of keys){ |
| r -= (map[k]||0); |
| if(r <= 0) return k; |
| } |
| return keys[ keys.length-1 ]; |
| } |
| |
| /* ---------- Inline worker creation ---------- */ |
| const workerCode = `self.onmessage = function(ev){ |
| const {action, chunk, eof} = ev.data; |
| if(action === 'analyzeChunk'){ |
| // chunk: string (portion of file) |
| // We'll split by newlines, extract emails and emit local-parts + domains + pattern counts + suffix detection |
| const lines = chunk.split(/\\r?\\n/).map(l=>l.trim()).filter(Boolean); |
| const domainCounts = {}; const localSet = new Set(); |
| const patternCounts = {fn_dot_ln:0, fi_dot_ln:0, fnln:0, fn_digits:0, nick_digits:0, pure_nick:0, other:0}; |
| const suffixCounts = {}; |
| const nameCandidates = {first:{},last:{}}; |
| const domainMap = {}; |
| for(const raw of lines){ |
| try{ |
| const lower = raw.toLowerCase(); |
| if(!lower.includes('@')) continue; |
| const [local, domain] = lower.split('@'); |
| if(!local) continue; |
| localSet.add(local); |
| domainCounts[domain] = (domainCounts[domain] || 0) + 1; |
| // pattern detection (simple heuristics) |
| // fn.ln or fn.lnNN |
| if(/^[a-z]+\\.[a-z]+\\d*$/.test(local)){ |
| patternCounts.fn_dot_ln++; |
| const parts = local.split('.'); |
| if(parts.length>=2){ |
| const fn = parts[0].replace(/\\d+$/,''); const ln = parts.slice(1).join('.').replace(/\\d+$/,''); |
| if(fn) nameCandidates.first[fn] = (nameCandidates.first[fn]||0)+1; |
| if(ln) nameCandidates.last[ln] = (nameCandidates.last[ln]||0)+1; |
| } |
| } else if(/^[a-z]\\.[a-z]+\\d*$/.test(local)){ |
| patternCounts.fi_dot_ln++; |
| } else if(/^[a-z]+[a-z]+\\d*$/.test(local) && /[0-9]/.test(local) && /[a-z]/.test(local)){ |
| // letters + digits mixed |
| // differentiate nick_digits vs fn_digits heuristics by presence of dot or underscore earlier (we checked) |
| patternCounts.fn_digits++; |
| } else if(/^[a-z]+\\d+$/.test(local)){ |
| patternCounts.nick_digits++; |
| } else if(/^[a-z]+$/.test(local)){ |
| patternCounts.pure_nick++; |
| // candidate could be either first or last; increment in both maps for possible extraction |
| nameCandidates.first[local] = (nameCandidates.first[local]||0)+1; |
| nameCandidates.last[local] = (nameCandidates.last[local]||0)+1; |
| } else { |
| patternCounts.other++; |
| } |
| // suffix extraction (numbers at end) |
| const m = local.match(/(\\d{1,8})$/); |
| if(m){ |
| const suf = m[1]; |
| suffixCounts[suf] = (suffixCounts[suf]||0)+1; |
| } |
| domainMap[domain] = (domainMap[domain]||0)+1; |
| }catch(e){/*ignore per-line errors*/ } |
| } |
| // respond with partial results |
| self.postMessage({action:'chunkResult',domainCounts,patternCounts,localCount: localSet.size,suffixCounts,nameCandidates,domainMap}); |
| if(eof) self.postMessage({action:'done'}); |
| } |
| };`; |
| const workerBlob = new Blob([workerCode], {type:'application/javascript'}); |
| const workerUrl = URL.createObjectURL(workerBlob); |
| |
| /* ---------- State ---------- */ |
| let analysisState = { |
| totalLines:0, |
| uniqueLocal:0, |
| domainCounts:{}, |
| patternCounts:{fn_dot_ln:0,fi_dot_ln:0,fnln:0,fn_digits:0,nick_digits:0,pure_nick:0,other:0}, |
| suffixCounts:{}, |
| nameCandidates:{first:{},last:{}}, |
| domainMap:{} |
| }; |
| let lastGenerated = []; |
| |
| /* ---------- DOM refs ---------- */ |
| const fileInput = document.getElementById('fileInput'); |
| const analyzeBtn = document.getElementById('analyzeBtn'); |
| const resetBtn = document.getElementById('resetBtn'); |
| const patternList = document.getElementById('patternList'); |
| const fileProgress = document.getElementById('fileProgress'); |
| const statTotal = document.getElementById('statTotal'); |
| const statUnique = document.getElementById('statUnique'); |
| const statTopDomain = document.getElementById('statTopDomain'); |
| const statPatterns = document.getElementById('statPatterns'); |
| const optNormalize = document.getElementById('optNormalize'); |
| const optExtractNames = document.getElementById('optExtractNames'); |
| |
| const genBtn = document.getElementById('genBtn'); |
| const previewBtn = document.getElementById('previewBtn'); |
| const openSampleBtn = document.getElementById('openSampleBtn'); |
| const genCountInput = document.getElementById('genCount'); |
| const domainPreset = document.getElementById('domainPreset'); |
| const sufCommon = document.getElementById('sufCommon'); |
| const sufYears = document.getElementById('sufYears'); |
| const firstPoolTA = document.getElementById('firstPool'); |
| const lastPoolTA = document.getElementById('lastPool'); |
| const outList = document.getElementById('outList'); |
| const exportTxt = document.getElementById('exportTxt'); |
| const exportCsv = document.getElementById('exportCsv'); |
| const exportJson = document.getElementById('exportJson'); |
| const analyzeProgress = fileProgress; |
| |
| /* ---------- File parsing & analysis (chunked with worker) ---------- */ |
| function resetAnalysis(){ |
| analysisState = { |
| totalLines:0, uniqueLocal:0, domainCounts:{}, patternCounts:{fn_dot_ln:0,fi_dot_ln:0,fnln:0,fn_digits:0,nick_digits:0,pure_nick:0,other:0}, suffixCounts:{}, nameCandidates:{first:{},last:{}}, domainMap:{} |
| }; |
| patternList.innerHTML = ''; |
| statTotal.textContent = '0'; statUnique.textContent = '0'; statTopDomain.textContent = '—'; statPatterns.textContent = '—'; |
| outList.textContent = ''; |
| lastGenerated = []; |
| fileProgress.style.width = '0%'; |
| } |
| |
| resetBtn.addEventListener('click', ()=>{ resetAnalysis(); fileInput.value=''; }); |
| |
| analyzeBtn.addEventListener('click', ()=>{ |
| const file = fileInput.files && fileInput.files[0]; |
| if(!file){ alert('Выберите .txt файл с логинами первым.'); return; } |
| resetAnalysis(); |
| analyzeFileChunked(file); |
| }); |
| |
| function analyzeFileChunked(file){ |
| const worker = new Worker(workerUrl); |
| const CHUNK_SIZE = 2 * 1024 * 1024; // 2MB chunk |
| let offset = 0; |
| let partial = ''; |
| const reader = new FileReader(); |
| |
| worker.onmessage = function(ev){ |
| const data = ev.data; |
| if(data.action === 'chunkResult'){ |
| // merge into analysisState |
| mergeCounts(analysisState, data); |
| updateStatsUI(); |
| } else if(data.action === 'done'){ |
| // finalize |
| finalizeAnalysis(); |
| worker.terminate(); |
| } |
| }; |
| |
| reader.onerror = err => { alert('Ошибка чтения файла: '+ err); worker.terminate(); }; |
| reader.onload = function(e){ |
| try{ |
| let text = e.target.result; |
| // prepend partial leftover |
| text = partial + text; |
| // try to keep last line partial if file continues |
| const lastNewline = text.lastIndexOf('\\n'); |
| let chunkToSend = text; |
| if(lastNewline !== -1 && offset + CHUNK_SIZE < file.size){ |
| chunkToSend = text.slice(0, lastNewline+1); |
| partial = text.slice(lastNewline+1); |
| } else { // final chunk or small file |
| partial = ''; |
| } |
| const eof = (offset + CHUNK_SIZE) >= file.size; |
| worker.postMessage({action:'analyzeChunk', chunk:chunkToSend, eof}); |
| offset += CHUNK_SIZE; |
| // update progress |
| const pct = Math.min(100, Math.round((offset / file.size) * 100)); |
| fileProgress.style.width = pct + '%'; |
| if(offset < file.size){ |
| readSlice(); |
| } else { |
| // done reading |
| } |
| }catch(err){ console.error(err); worker.terminate(); alert('Ошибка обработки чанка: '+err); } |
| }; |
| |
| function readSlice(){ |
| const slice = file.slice(offset, offset + CHUNK_SIZE); |
| reader.readAsText(slice); |
| } |
| // start |
| readSlice(); |
| } |
| |
| /* merge worker results into analysisState */ |
| function mergeCounts(state, data){ |
| // domains |
| for(const d in data.domainCounts){ |
| state.domainCounts[d] = (state.domainCounts[d] || 0) + data.domainCounts[d]; |
| } |
| // patterns |
| for(const k in state.patternCounts){ |
| state.patternCounts[k] = (state.patternCounts[k] || 0) + (data.patternCounts[k] || 0); |
| } |
| // suffixes |
| for(const s in data.suffixCounts){ |
| state.suffixCounts[s] = (state.suffixCounts[s] || 0) + data.suffixCounts[s]; |
| } |
| // names |
| ['first','last'].forEach(kind=>{ |
| const cand = data.nameCandidates?.[kind] || {}; |
| for(const nm in cand){ |
| state.nameCandidates[kind][nm] = (state.nameCandidates[kind][nm]||0) + cand[nm]; |
| } |
| }); |
| // unique local count approximation (we sum partial unique counts, but we will recompute accurately later if needed) |
| state.uniqueLocal += data.localCount || 0; |
| // domainMap |
| for(const d in data.domainMap){ |
| state.domainMap[d] = (state.domainMap[d] || 0) + data.domainMap[d]; |
| } |
| } |
| |
| /* update compact UI */ |
| function updateStatsUI(){ |
| const total = Object.values(analysisState.domainMap).reduce((a,b)=>a+b,0); |
| statTotal.textContent = total || '0'; |
| statUnique.textContent = analysisState.uniqueLocal || '0'; |
| // top domain |
| const domainEntries = Object.entries(analysisState.domainCounts||{}).sort((a,b)=>b[1]-a[1]); |
| statTopDomain.textContent = domainEntries.length ? `${domainEntries[0][0]} (${domainEntries[0][1]})` : '—'; |
| // patterns summary |
| const pc = analysisState.patternCounts; |
| const sumP = Object.values(pc).reduce((a,b)=>a+b,0) || 0; |
| statPatterns.textContent = sumP ? Object.entries(pc).map(([k,v])=>`${k}:${v}`).join(' | ') : '—'; |
| // render pattern list interactive |
| renderPatternList(pc); |
| } |
| |
| /* render interactive pattern list with checkboxes and weight sliders */ |
| function renderPatternList(patternCounts){ |
| patternList.innerHTML = ''; |
| const total = Object.values(patternCounts).reduce((a,b)=>a+b,0) || 1; |
| for(const [k,v] of Object.entries(patternCounts)){ |
| const pct = Math.round((v/total)*100); |
| const item = document.createElement('div'); |
| item.className = 'pattern-item'; |
| item.innerHTML = \` |
| <div style="display:flex;gap:10px;align-items:center"> |
| <input type="checkbox" data-pattern="\${k}" checked /> |
| <div style="min-width:120px"><strong>\${k}</strong></div> |
| <div class="small muted">\${v} hits</div> |
| </div> |
| <div style="width:40%"> |
| <div style="height:8px;background:rgba(255,255,255,0.03);border-radius:6px;overflow:hidden"> |
| <div class="bar" style="width:\${pct}%;"></div> |
| </div> |
| </div> |
| \`; |
| patternList.appendChild(item); |
| } |
| } |
| |
| /* finalize analysis: compute derived pools */ |
| function finalizeAnalysis(){ |
| // compute normalized name pools from analysisState.nameCandidates (top N) |
| const firsts = Object.entries(analysisState.nameCandidates.first || {}).sort((a,b)=>b[1]-a[1]).slice(0,200).map(x=>normalizeStr(x[0])); |
| const lasts = Object.entries(analysisState.nameCandidates.last || {}).sort((a,b)=>b[1]-a[1]).slice(0,200).map(x=>normalizeStr(x[0])); |
| // put into textareas only if they are empty (user may override) |
| if(!firstPoolTA.value.trim()){ |
| firstPoolTA.value = firsts.join('\\n'); |
| } |
| if(!lastPoolTA.value.trim()){ |
| lastPoolTA.value = lasts.join('\\n'); |
| } |
| // preset suffix common list |
| const topSuffixes = Object.entries(analysisState.suffixCounts || {}).sort((a,b)=>b[1]-a[1]).slice(0,20).map(x=>x[0]); |
| sufCommon.value = topSuffixes.slice(0,12).join(','); |
| updateStatsUI(); |
| alert('Анализ завершён. Проверьте автоматически заполненные пулы имён и фамилий. Отредактируйте при необходимости и нажмите "Генерировать".'); |
| } |
| |
| /* ---------- Generation logic ---------- */ |
| function getSelectedPatterns(){ |
| return Array.from(patternList.querySelectorAll('input[type="checkbox"]:checked')).map(cb=>cb.dataset.pattern); |
| } |
| function getDomainDistribution(){ |
| // depending on preset |
| const preset = domainPreset.value; |
| const domainCounts = analysisState.domainCounts || {}; |
| if(preset === 'auto'){ |
| return normalizeDistribution(domainCounts); |
| } |
| // define some domain groups |
| const local = ['gmx.at','aon.at','chello.at','liwest.at','inode.at','student.uibk.ac.at','proton.me','protonmail.com','medundmed.at','drei.at','tmo.at']; |
| const global = ['gmail.com','yahoo.com','outlook.com','hotmail.com','live.com','googlemail.com','msn.com','ymail.com']; |
| const dist = {}; |
| if(preset === 'local'){ |
| local.forEach(d=>dist[d]=1); |
| } else { |
| global.forEach(d=>dist[d]=1); |
| } |
| return normalizeDistribution(dist); |
| } |
| function normalizeDistribution(map){ |
| const m = {}; |
| const keys = Object.keys(map); |
| if(!keys.length) return {'gmail.com':1}; |
| let total = 0; |
| for(const k of keys){ m[k] = Number(map[k]||0); total += m[k]; } |
| if(total === 0){ |
| // fallback: equal weights |
| keys.forEach(k=>m[k]=1); |
| total = keys.length; |
| } |
| // return normalized weights (not necessary but keep numbers) |
| return m; |
| } |
| |
| /* parse pools */ |
| function parsePool(text){ |
| if(!text) return []; |
| const arr = text.split(/[\\n,;]+/).map(s=>normalizeStr(s)).filter(Boolean); |
| return uniq(arr); |
| } |
| |
| /* build pattern application functions */ |
| function buildGenerators(selectedPatterns, firstPool, lastPool, nickPool, suffixList, yearRange, domainWeights){ |
| const gens = []; |
| // helper small funcs |
| const rnd = arr => arr[Math.floor(Math.random()*arr.length)]; |
| const pickDomain = ()=> sampleWeighted(domainWeights) || 'gmail.com'; |
| const pickSuffix = ()=> suffixList.length ? suffixList[Math.floor(Math.random()*suffixList.length)] : ''; |
| const pickYearSuffix = ()=>{ |
| if(!yearRange) return ''; |
| const [a,b] = yearRange; |
| const y = a + Math.floor(Math.random()*(b-a+1)); |
| return String(y); |
| }; |
| const maybeNum = ()=>{ |
| if(Math.random()<0.45){ |
| if(Math.random()<0.5) return pickSuffix(); |
| return pickYearSuffix(); |
| } |
| return ''; |
| }; |
| |
| for(const p of selectedPatterns){ |
| if(p === 'fn_dot_ln'){ |
| gens.push(()=>{ |
| const f = rnd(firstPool); const l = rnd(lastPool); |
| let local = `${f}.${l}`; |
| if(Math.random()<0.4){ local += maybeNum(); } |
| return `${local}@${pickDomain()}`; |
| }); |
| } else if(p === 'fi_dot_ln'){ |
| gens.push(()=>{ |
| const f = rnd(firstPool); const l = rnd(lastPool); |
| let local = `${f[0]}.${l}`; |
| if(Math.random()<0.3){ local += maybeNum(); } |
| return `${local}@${pickDomain()}`; |
| }); |
| } else if(p === 'fnln'){ |
| gens.push(()=>{ |
| const f = rnd(firstPool); const l = rnd(lastPool); |
| let local = `${f}${l}`; |
| if(Math.random()<0.35){ local += maybeNum(); } |
| return `${local}@${pickDomain()}`; |
| }); |
| } else if(p === 'fn_digits' || p === 'nick_digits'){ |
| gens.push(()=>{ |
| const chooseNick = Math.random()<0.5; |
| const base = chooseNick ? (rnd(nickPool)||rnd(firstPool)||'user') : (rnd(firstPool)+ (Math.random()<0.3?'.':'' ) + (rnd(lastPool)||'')); |
| const su = maybeNum() || pickSuffix(); |
| const local = base + su; |
| return `${local}@${pickDomain()}`; |
| }); |
| } else if(p === 'pure_nick'){ |
| gens.push(()=>{ |
| const base = rnd(nickPool) || rnd(firstPool) || 'user'; |
| const local = (Math.random()<0.35) ? (base + maybeNum()) : base; |
| return `${local}@${pickDomain()}`; |
| }); |
| } else { |
| // fallback generic |
| gens.push(()=>{ |
| const f = rnd(firstPool) || 'john'; const l = rnd(lastPool) || 'doe'; |
| let local = `${f}.${l}`; |
| if(Math.random()<0.4) local += maybeNum(); |
| return `${local}@${pickDomain()}`; |
| }); |
| } |
| } |
| // ensure at least one generator |
| if(gens.length === 0){ |
| gens.push(()=>{ |
| const f = rnd(firstPool) || 'john'; const l = rnd(lastPool) || 'doe'; |
| return `${f}.${l}@gmail.com`; |
| }); |
| } |
| return gens; |
| } |
| |
| /* parse year range string like "1960-2005" */ |
| function parseYearRange(s){ |
| if(!s) return null; |
| const m = s.match(/(\\d{3,4})\\s*-\\s*(\\d{3,4})/); |
| if(m){ |
| const a = Math.max(1900, Number(m[1])); |
| const b = Math.min(2100, Number(m[2])); |
| if(a<=b) return [a,b]; |
| } |
| return null; |
| } |
| |
| /* generator driver */ |
| function generateList(count, options = {}){ |
| const selectedPatterns = getSelectedPatterns(); |
| const firstPool = parsePool(firstPoolTA.value) .length ? parsePool(firstPoolTA.value) : parsePool(document.getElementById('firstSeeds').value); |
| const lastPool = parsePool(lastPoolTA.value) .length ? parsePool(lastPoolTA.value) : parsePool(document.getElementById('lastSeeds').value); |
| // fallback: if pools empty, derive from analysis top candidates |
| const fPool = firstPool.length ? firstPool : Object.keys(analysisState.nameCandidates.first || {}).slice(0,200).map(k=>normalizeStr(k)); |
| const lPool = lastPool.length ? lastPool : Object.keys(analysisState.nameCandidates.last || {}).slice(0,200).map(k=>normalizeStr(k)); |
| const nickPool = parsePool(document.getElementById('nickSeeds').value) .length ? parsePool(document.getElementById('nickSeeds').value) : (Object.keys(analysisState.nameCandidates.first||{}).slice(0,200).map(k=>normalizeStr(k))); |
| const domainWeights = getDomainDistribution(); |
| const suffixList = sufCommon.value ? uniq(sufCommon.value.split(/[\\n,;]+/).map(s=>s.trim()).filter(Boolean)) : Object.keys(analysisState.suffixCounts||{}).slice(0,20); |
| const yearRange = parseYearRange(sufYears.value) || [1970,2005]; |
| const gens = buildGenerators(selectedPatterns, fPool, lPool, nickPool, suffixList, yearRange, domainWeights); |
| |
| const out = new Set(); |
| // Weighted selection among generators proportional to patternCounts |
| // Build generator weights map |
| const genMap = {}; |
| const patt = analysisState.patternCounts || {}; |
| const selectedCounts = selectedPatterns.reduce((acc,k)=>{ acc[k] = patt[k]||1; return acc; }, {}); |
| // create array of gen funcs repeated proportionally to selectedCounts |
| const genFuncs = []; |
| for(const p of selectedPatterns){ |
| const idx = selectedPatterns.indexOf(p); |
| // find corresponding gens by same order in buildGenerators; simple mapping: gens[i] corresponds to selectedPatterns[i] |
| } |
| // Instead use sampleWeighted by pattern counts to pick a pattern each iteration, then choose corresponding generator from gens array |
| const patternWeightMap = {}; |
| selectedPatterns.forEach((p,i)=> patternWeightMap[p] = (analysisState.patternCounts[p] || 1) ); |
| |
| // generation loop with dedup and safety cap |
| const CAP = Math.max(count*10, 2000); // attempts cap to avoid infinite loops |
| let attempts = 0; |
| while(out.size < count && attempts < CAP){ |
| attempts++; |
| const chosenPattern = sampleWeighted(patternWeightMap) || selectedPatterns[Math.floor(Math.random()*selectedPatterns.length)]; |
| // find generator for that pattern (we built gens in the same order as selectedPatterns) |
| const idx = selectedPatterns.indexOf(chosenPattern); |
| const genFunc = (idx>=0 && idx < gens.length) ? gens[idx] : gens[Math.floor(Math.random()*gens.length)]; |
| try{ |
| const val = genFunc(); |
| if(val && typeof val === 'string'){ |
| const cleaned = normalizeStrEmail(val); |
| out.add(cleaned); |
| } |
| }catch(e){ console.error('generator error', e); } |
| } |
| lastGenerated = Array.from(out); |
| // show results |
| renderOutput(lastGenerated); |
| return lastGenerated; |
| } |
| |
| /* email normalization: keep local part allowed characters and domain as is */ |
| function normalizeStrEmail(email){ |
| let parts = String(email).trim().toLowerCase().split('@'); |
| if(parts.length < 2) return email; |
| let local = parts.slice(0,parts.length-1).join('@'); // in case local had @ (rare) |
| const domain = parts[parts.length-1].trim(); |
| // replace diacritics in local |
| local = local.replace(/[^ -~]/g, ch => DIACRIT_MAP[ch] || ch); |
| // allowed chars for local: a-z0-9._-+ (we keep plus signs too as they appear in data) |
| local = local.replace(/[^a-z0-9._\\-+]/g,''); |
| // avoid leading/trailing dot |
| local = local.replace(/^\\.+|\\.+$/g,''); |
| return local + '@' + domain; |
| } |
| |
| /* render output */ |
| function renderOutput(list){ |
| outList.innerText = list.join('\\n'); |
| } |
| |
| /* ---------- UI events ---------- */ |
| previewBtn.addEventListener('click', ()=>{ |
| const res = generateList(Math.min(25, Number(genCountInput.value) || 25)); |
| alert('Предпросмотр: ' + res.length + ' записей сгенерировано (показаны в списке).'); |
| }); |
| |
| genBtn.addEventListener('click', ()=>{ |
| const cnt = Math.max(1, Number(genCountInput.value) || 100); |
| generateList(cnt); |
| alert('Генерация завершена: ' + lastGenerated.length + ' уникальных записей.'); |
| }); |
| |
| openSampleBtn.addEventListener('click', ()=>{ |
| // show sample from analysis if available |
| const sample = Object.keys(analysisState.domainMap||{}).slice(0,10).map(d=>d+': '+(analysisState.domainMap[d]||0)).join('\\n'); |
| alert('Top domains samples:\\n' + sample); |
| }); |
| |
| /* exports */ |
| function download(filename, text){ |
| const blob = new Blob([text], {type:'text/plain;charset=utf-8'}); |
| const a = document.createElement('a'); |
| a.href = URL.createObjectURL(blob); |
| a.download = filename; |
| document.body.appendChild(a); a.click(); |
| setTimeout(()=>{ URL.revokeObjectURL(a.href); a.remove(); }, 100); |
| } |
| exportTxt.addEventListener('click', ()=> { |
| if(!lastGenerated.length){ alert('Нет сгенерированных данных.'); return; } |
| download('generated_usernames.txt', lastGenerated.join('\\n')); |
| }); |
| exportCsv.addEventListener('click', ()=> { |
| if(!lastGenerated.length){ alert('Нет сгенерированных данных.'); return; } |
| // simple CSV with column email |
| const csv = 'email\\n' + lastGenerated.map(e=>`"\${e.replace(/"/g,'""')}"`).join('\\n'); |
| download('generated_usernames.csv', csv); |
| }); |
| exportJson.addEventListener('click', ()=> { |
| if(!lastGenerated.length){ alert('Нет сгенерированных данных.'); return; } |
| download('generated_usernames.json', JSON.stringify(lastGenerated, null, 2)); |
| }); |
| |
| /* ---------- Helpers for UI and sanity ---------- */ |
| function safeParseInt(v, d){ const n = parseInt(v,10); return isNaN(n)? d : n; } |
| |
| /* ---------- Initialize small defaults ---------- */ |
| (function initDefaults(){ |
| // prefill sufYears example |
| sufYears.placeholder = 'Пример: 1960-2005'; |
| sufCommon.placeholder = 'Например: 007,123,84,2005'; |
| })(); |
| |
| /* ---------- End of script ---------- */ |
| </script> |
| </body> |
| </html> |