Spaces:
Running
Running
Add Word Count stats block above tabs (General Stats)
Browse files- 2026-02-07_12-32-45.png +0 -0
- 2026-02-07_13-16-47.png +0 -0
- app.py +16 -1
- logic.py +17 -0
- models.py +2 -2
- templates/index.html +46 -0
2026-02-07_12-32-45.png
ADDED
|
2026-02-07_13-16-47.png
ADDED
|
app.py
CHANGED
|
@@ -27,12 +27,25 @@ async def read_root(request: Request):
|
|
| 27 |
|
| 28 |
@app.post("/analyze", response_model=AnalysisResponse)
|
| 29 |
async def analyze_text(request: AnalysisRequest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
ngram_stats_result = logic.calculate_ngram_stats(
|
| 31 |
request.target_text,
|
| 32 |
request.competitors,
|
| 33 |
request.language
|
| 34 |
)
|
| 35 |
|
|
|
|
| 36 |
key_phrases, key_words_unigrams = logic.parse_keywords(request.keywords, request.language)
|
| 37 |
|
| 38 |
bm25_recs = logic.calculate_bm25_recommendations(
|
|
@@ -42,6 +55,7 @@ async def analyze_text(request: AnalysisRequest):
|
|
| 42 |
request.language
|
| 43 |
)
|
| 44 |
|
|
|
|
| 45 |
bert_results = logic.perform_bert_analysis(
|
| 46 |
request.target_text,
|
| 47 |
request.competitors,
|
|
@@ -52,7 +66,8 @@ async def analyze_text(request: AnalysisRequest):
|
|
| 52 |
return AnalysisResponse(
|
| 53 |
ngram_stats=ngram_stats_result,
|
| 54 |
bm25_recommendations=bm25_recs,
|
| 55 |
-
bert_analysis=bert_results
|
|
|
|
| 56 |
)
|
| 57 |
|
| 58 |
# Hugging Face Spaces использует порт 7860
|
|
|
|
| 27 |
|
| 28 |
@app.post("/analyze", response_model=AnalysisResponse)
|
| 29 |
async def analyze_text(request: AnalysisRequest):
|
| 30 |
+
# 1. Считаем слова (Word Count)
|
| 31 |
+
wc_target = logic.count_words(request.target_text, request.language)
|
| 32 |
+
wc_competitors = [logic.count_words(t, request.language) for t in request.competitors]
|
| 33 |
+
wc_avg = sum(wc_competitors) / len(wc_competitors) if wc_competitors else 0
|
| 34 |
+
|
| 35 |
+
word_counts_data = {
|
| 36 |
+
"target": wc_target,
|
| 37 |
+
"competitors": wc_competitors,
|
| 38 |
+
"avg": round(wc_avg)
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# 2. N-gram stats
|
| 42 |
ngram_stats_result = logic.calculate_ngram_stats(
|
| 43 |
request.target_text,
|
| 44 |
request.competitors,
|
| 45 |
request.language
|
| 46 |
)
|
| 47 |
|
| 48 |
+
# 3. BM25
|
| 49 |
key_phrases, key_words_unigrams = logic.parse_keywords(request.keywords, request.language)
|
| 50 |
|
| 51 |
bm25_recs = logic.calculate_bm25_recommendations(
|
|
|
|
| 55 |
request.language
|
| 56 |
)
|
| 57 |
|
| 58 |
+
# 4. BERT
|
| 59 |
bert_results = logic.perform_bert_analysis(
|
| 60 |
request.target_text,
|
| 61 |
request.competitors,
|
|
|
|
| 66 |
return AnalysisResponse(
|
| 67 |
ngram_stats=ngram_stats_result,
|
| 68 |
bm25_recommendations=bm25_recs,
|
| 69 |
+
bert_analysis=bert_results,
|
| 70 |
+
word_counts=word_counts_data
|
| 71 |
)
|
| 72 |
|
| 73 |
# Hugging Face Spaces использует порт 7860
|
logic.py
CHANGED
|
@@ -172,6 +172,23 @@ def generate_ngrams_safe(text: str, lang: str, n: int) -> List[str]:
|
|
| 172 |
|
| 173 |
return all_ngrams
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
# --- ANALYTICS (N-grams & BM25) ---
|
| 176 |
|
| 177 |
def calculate_ngram_stats(target_text: str, competitor_texts: List[str], lang: str) -> Dict:
|
|
|
|
| 172 |
|
| 173 |
return all_ngrams
|
| 174 |
|
| 175 |
+
# --- WORD COUNT ---
|
| 176 |
+
|
| 177 |
+
def count_words(text: str, lang: str) -> int:
|
| 178 |
+
"""
|
| 179 |
+
Считает количество слов (токенов), исключая пунктуацию и пробелы.
|
| 180 |
+
"""
|
| 181 |
+
if not text.strip():
|
| 182 |
+
return 0
|
| 183 |
+
|
| 184 |
+
doc = get_doc(text, lang)
|
| 185 |
+
|
| 186 |
+
count = 0
|
| 187 |
+
for t in doc:
|
| 188 |
+
if not t.is_punct and not t.is_space and not t.pos_ == "SYM":
|
| 189 |
+
count += 1
|
| 190 |
+
return count
|
| 191 |
+
|
| 192 |
# --- ANALYTICS (N-grams & BM25) ---
|
| 193 |
|
| 194 |
def calculate_ngram_stats(target_text: str, competitor_texts: List[str], lang: str) -> Dict:
|
models.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
from pydantic import BaseModel
|
| 2 |
-
from typing import List, Optional
|
| 3 |
from typing import List, Dict, Optional, Any
|
| 4 |
|
| 5 |
|
|
@@ -16,4 +15,5 @@ class AnalysisRequest(BaseModel):
|
|
| 16 |
class AnalysisResponse(BaseModel):
|
| 17 |
ngram_stats: dict # Статистика униграм/биграм
|
| 18 |
bm25_recommendations: List[dict] # Рекомендации "добавить/убрать"
|
| 19 |
-
bert_analysis: Dict[str, Any] # Векторный анализ
|
|
|
|
|
|
| 1 |
from pydantic import BaseModel
|
|
|
|
| 2 |
from typing import List, Dict, Optional, Any
|
| 3 |
|
| 4 |
|
|
|
|
| 15 |
class AnalysisResponse(BaseModel):
|
| 16 |
ngram_stats: dict # Статистика униграм/биграм
|
| 17 |
bm25_recommendations: List[dict] # Рекомендации "добавить/убрать"
|
| 18 |
+
bert_analysis: Dict[str, Any] # Векторный анализ
|
| 19 |
+
word_counts: Dict[str, Any] # {'target': 500, 'competitors': [600, 450], 'avg': 525}
|
templates/index.html
CHANGED
|
@@ -76,6 +76,11 @@
|
|
| 76 |
|
| 77 |
<!-- ПРАВАЯ КОЛОНКА: РЕЗУЛЬТАТЫ -->
|
| 78 |
<div class="col-md-7">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
<!-- Табы -->
|
| 81 |
<ul class="nav nav-tabs mb-3" id="resultsTab" role="tablist">
|
|
@@ -217,6 +222,47 @@
|
|
| 217 |
}
|
| 218 |
|
| 219 |
function renderResults(data) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
// 1. BERT Render (ИСПРАВЛЕННЫЙ ПОД НОВУЮ СТРУКТУРУ)
|
| 221 |
const bertContainer = document.getElementById('bertResultsContainer');
|
| 222 |
bertContainer.innerHTML = '';
|
|
|
|
| 76 |
|
| 77 |
<!-- ПРАВАЯ КОЛОНКА: РЕЗУЛЬТАТЫ -->
|
| 78 |
<div class="col-md-7">
|
| 79 |
+
|
| 80 |
+
<!-- Блок общей статистики (Word Count) -->
|
| 81 |
+
<div id="generalStats" class="mb-4">
|
| 82 |
+
<!-- Сюда JS вставит карточки -->
|
| 83 |
+
</div>
|
| 84 |
|
| 85 |
<!-- Табы -->
|
| 86 |
<ul class="nav nav-tabs mb-3" id="resultsTab" role="tablist">
|
|
|
|
| 222 |
}
|
| 223 |
|
| 224 |
function renderResults(data) {
|
| 225 |
+
// 0. General Stats Render (Word Count)
|
| 226 |
+
const statsContainer = document.getElementById('generalStats');
|
| 227 |
+
statsContainer.innerHTML = '';
|
| 228 |
+
|
| 229 |
+
if (data.word_counts) {
|
| 230 |
+
const myWords = data.word_counts.target;
|
| 231 |
+
const avgWords = data.word_counts.avg;
|
| 232 |
+
|
| 233 |
+
let compsHtml = '';
|
| 234 |
+
data.word_counts.competitors.forEach((count, idx) => {
|
| 235 |
+
compsHtml += `<div class="px-2 border-end"><small class="text-muted d-block">K${idx+1}</small><strong>${count}</strong></div>`;
|
| 236 |
+
});
|
| 237 |
+
|
| 238 |
+
const html = `
|
| 239 |
+
<div class="card border-0 shadow-sm">
|
| 240 |
+
<div class="card-body py-2 d-flex align-items-center justify-content-between">
|
| 241 |
+
|
| 242 |
+
<!-- Мой результат -->
|
| 243 |
+
<div class="d-flex align-items-center">
|
| 244 |
+
<div class="me-3">
|
| 245 |
+
<small class="text-muted d-block">Мой текст</small>
|
| 246 |
+
<span class="h4 mb-0 fw-bold ${myWords < avgWords ? 'text-danger' : 'text-success'}">${myWords}</span> <small class="text-muted">слов</small>
|
| 247 |
+
</div>
|
| 248 |
+
</div>
|
| 249 |
+
|
| 250 |
+
<!-- Сравнение -->
|
| 251 |
+
<div class="d-flex text-center">
|
| 252 |
+
<div class="px-3">
|
| 253 |
+
<small class="text-muted d-block">Среднее (Конкуренты)</small>
|
| 254 |
+
<span class="h5 mb-0 fw-bold">${avgWords}</span>
|
| 255 |
+
</div>
|
| 256 |
+
<div class="d-flex align-items-center border-start ps-2">
|
| 257 |
+
${compsHtml}
|
| 258 |
+
</div>
|
| 259 |
+
</div>
|
| 260 |
+
|
| 261 |
+
</div>
|
| 262 |
+
</div>`;
|
| 263 |
+
statsContainer.innerHTML = html;
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
// 1. BERT Render (ИСПРАВЛЕННЫЙ ПОД НОВУЮ СТРУКТУРУ)
|
| 267 |
const bertContainer = document.getElementById('bertResultsContainer');
|
| 268 |
bertContainer.innerHTML = '';
|