lsdf commited on
Commit
dd3f7c2
·
1 Parent(s): 8adc7df

Add Word Count stats block above tabs (General Stats)

Browse files
Files changed (6) hide show
  1. 2026-02-07_12-32-45.png +0 -0
  2. 2026-02-07_13-16-47.png +0 -0
  3. app.py +16 -1
  4. logic.py +17 -0
  5. models.py +2 -2
  6. templates/index.html +46 -0
2026-02-07_12-32-45.png ADDED
2026-02-07_13-16-47.png ADDED
app.py CHANGED
@@ -27,12 +27,25 @@ async def read_root(request: Request):
27
 
28
  @app.post("/analyze", response_model=AnalysisResponse)
29
  async def analyze_text(request: AnalysisRequest):
 
 
 
 
 
 
 
 
 
 
 
 
30
  ngram_stats_result = logic.calculate_ngram_stats(
31
  request.target_text,
32
  request.competitors,
33
  request.language
34
  )
35
 
 
36
  key_phrases, key_words_unigrams = logic.parse_keywords(request.keywords, request.language)
37
 
38
  bm25_recs = logic.calculate_bm25_recommendations(
@@ -42,6 +55,7 @@ async def analyze_text(request: AnalysisRequest):
42
  request.language
43
  )
44
 
 
45
  bert_results = logic.perform_bert_analysis(
46
  request.target_text,
47
  request.competitors,
@@ -52,7 +66,8 @@ async def analyze_text(request: AnalysisRequest):
52
  return AnalysisResponse(
53
  ngram_stats=ngram_stats_result,
54
  bm25_recommendations=bm25_recs,
55
- bert_analysis=bert_results
 
56
  )
57
 
58
  # Hugging Face Spaces использует порт 7860
 
27
 
28
  @app.post("/analyze", response_model=AnalysisResponse)
29
  async def analyze_text(request: AnalysisRequest):
30
+ # 1. Считаем слова (Word Count)
31
+ wc_target = logic.count_words(request.target_text, request.language)
32
+ wc_competitors = [logic.count_words(t, request.language) for t in request.competitors]
33
+ wc_avg = sum(wc_competitors) / len(wc_competitors) if wc_competitors else 0
34
+
35
+ word_counts_data = {
36
+ "target": wc_target,
37
+ "competitors": wc_competitors,
38
+ "avg": round(wc_avg)
39
+ }
40
+
41
+ # 2. N-gram stats
42
  ngram_stats_result = logic.calculate_ngram_stats(
43
  request.target_text,
44
  request.competitors,
45
  request.language
46
  )
47
 
48
+ # 3. BM25
49
  key_phrases, key_words_unigrams = logic.parse_keywords(request.keywords, request.language)
50
 
51
  bm25_recs = logic.calculate_bm25_recommendations(
 
55
  request.language
56
  )
57
 
58
+ # 4. BERT
59
  bert_results = logic.perform_bert_analysis(
60
  request.target_text,
61
  request.competitors,
 
66
  return AnalysisResponse(
67
  ngram_stats=ngram_stats_result,
68
  bm25_recommendations=bm25_recs,
69
+ bert_analysis=bert_results,
70
+ word_counts=word_counts_data
71
  )
72
 
73
  # Hugging Face Spaces использует порт 7860
logic.py CHANGED
@@ -172,6 +172,23 @@ def generate_ngrams_safe(text: str, lang: str, n: int) -> List[str]:
172
 
173
  return all_ngrams
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  # --- ANALYTICS (N-grams & BM25) ---
176
 
177
  def calculate_ngram_stats(target_text: str, competitor_texts: List[str], lang: str) -> Dict:
 
172
 
173
  return all_ngrams
174
 
175
+ # --- WORD COUNT ---
176
+
177
+ def count_words(text: str, lang: str) -> int:
178
+ """
179
+ Считает количество слов (токенов), исключая пунктуацию и пробелы.
180
+ """
181
+ if not text.strip():
182
+ return 0
183
+
184
+ doc = get_doc(text, lang)
185
+
186
+ count = 0
187
+ for t in doc:
188
+ if not t.is_punct and not t.is_space and not t.pos_ == "SYM":
189
+ count += 1
190
+ return count
191
+
192
  # --- ANALYTICS (N-grams & BM25) ---
193
 
194
  def calculate_ngram_stats(target_text: str, competitor_texts: List[str], lang: str) -> Dict:
models.py CHANGED
@@ -1,5 +1,4 @@
1
  from pydantic import BaseModel
2
- from typing import List, Optional
3
  from typing import List, Dict, Optional, Any
4
 
5
 
@@ -16,4 +15,5 @@ class AnalysisRequest(BaseModel):
16
  class AnalysisResponse(BaseModel):
17
  ngram_stats: dict # Статистика униграм/биграм
18
  bm25_recommendations: List[dict] # Рекомендации "добавить/убрать"
19
- bert_analysis: Dict[str, Any] # Векторный анализ
 
 
1
  from pydantic import BaseModel
 
2
  from typing import List, Dict, Optional, Any
3
 
4
 
 
15
  class AnalysisResponse(BaseModel):
16
  ngram_stats: dict # Статистика униграм/биграм
17
  bm25_recommendations: List[dict] # Рекомендации "добавить/убрать"
18
+ bert_analysis: Dict[str, Any] # Векторный анализ
19
+ word_counts: Dict[str, Any] # {'target': 500, 'competitors': [600, 450], 'avg': 525}
templates/index.html CHANGED
@@ -76,6 +76,11 @@
76
 
77
  <!-- ПРАВАЯ КОЛОНКА: РЕЗУЛЬТАТЫ -->
78
  <div class="col-md-7">
 
 
 
 
 
79
 
80
  <!-- Табы -->
81
  <ul class="nav nav-tabs mb-3" id="resultsTab" role="tablist">
@@ -217,6 +222,47 @@
217
  }
218
 
219
  function renderResults(data) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  // 1. BERT Render (ИСПРАВЛЕННЫЙ ПОД НОВУЮ СТРУКТУРУ)
221
  const bertContainer = document.getElementById('bertResultsContainer');
222
  bertContainer.innerHTML = '';
 
76
 
77
  <!-- ПРАВАЯ КОЛОНКА: РЕЗУЛЬТАТЫ -->
78
  <div class="col-md-7">
79
+
80
+ <!-- Блок общей статистики (Word Count) -->
81
+ <div id="generalStats" class="mb-4">
82
+ <!-- Сюда JS вставит карточки -->
83
+ </div>
84
 
85
  <!-- Табы -->
86
  <ul class="nav nav-tabs mb-3" id="resultsTab" role="tablist">
 
222
  }
223
 
224
  function renderResults(data) {
225
+ // 0. General Stats Render (Word Count)
226
+ const statsContainer = document.getElementById('generalStats');
227
+ statsContainer.innerHTML = '';
228
+
229
+ if (data.word_counts) {
230
+ const myWords = data.word_counts.target;
231
+ const avgWords = data.word_counts.avg;
232
+
233
+ let compsHtml = '';
234
+ data.word_counts.competitors.forEach((count, idx) => {
235
+ compsHtml += `<div class="px-2 border-end"><small class="text-muted d-block">K${idx+1}</small><strong>${count}</strong></div>`;
236
+ });
237
+
238
+ const html = `
239
+ <div class="card border-0 shadow-sm">
240
+ <div class="card-body py-2 d-flex align-items-center justify-content-between">
241
+
242
+ <!-- Мой результат -->
243
+ <div class="d-flex align-items-center">
244
+ <div class="me-3">
245
+ <small class="text-muted d-block">Мой текст</small>
246
+ <span class="h4 mb-0 fw-bold ${myWords < avgWords ? 'text-danger' : 'text-success'}">${myWords}</span> <small class="text-muted">слов</small>
247
+ </div>
248
+ </div>
249
+
250
+ <!-- Сравнение -->
251
+ <div class="d-flex text-center">
252
+ <div class="px-3">
253
+ <small class="text-muted d-block">Среднее (Конкуренты)</small>
254
+ <span class="h5 mb-0 fw-bold">${avgWords}</span>
255
+ </div>
256
+ <div class="d-flex align-items-center border-start ps-2">
257
+ ${compsHtml}
258
+ </div>
259
+ </div>
260
+
261
+ </div>
262
+ </div>`;
263
+ statsContainer.innerHTML = html;
264
+ }
265
+
266
  // 1. BERT Render (ИСПРАВЛЕННЫЙ ПОД НОВУЮ СТРУКТУРУ)
267
  const bertContainer = document.getElementById('bertResultsContainer');
268
  bertContainer.innerHTML = '';