coingimp commited on
Commit
c7cc5da
·
verified ·
1 Parent(s): ea43e93

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +536 -85
app.py CHANGED
@@ -3,6 +3,7 @@ import json
3
  import os
4
  import re
5
  import csv
 
6
  from datetime import datetime
7
  from pathlib import Path
8
 
@@ -17,7 +18,80 @@ KB_DIR = "knowledge_base"
17
  os.makedirs(DATASET_DIR, exist_ok=True)
18
  os.makedirs(KB_DIR, exist_ok=True)
19
 
20
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def extract_text_from_docx(file_path):
22
  if not DOCX_AVAILABLE:
23
  return ""
@@ -25,7 +99,6 @@ def extract_text_from_docx(file_path):
25
  paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
26
  return "\n\n".join(paragraphs)
27
 
28
-
29
  def split_into_chunks(text, chunk_size=512, overlap=64):
30
  words = text.split()
31
  chunks = []
@@ -36,7 +109,6 @@ def split_into_chunks(text, chunk_size=512, overlap=64):
36
  i += chunk_size - overlap
37
  return chunks
38
 
39
-
40
  def text_to_qa_pairs(text, source_name=""):
41
  paragraphs = [p.strip() for p in text.split("\n\n") if len(p.strip()) > 60]
42
  pairs = []
@@ -56,23 +128,18 @@ def text_to_qa_pairs(text, source_name=""):
56
  })
57
  return pairs
58
 
59
-
60
  dataset_store = []
61
 
62
-
63
  def process_files(files, chunk_size, overlap, format_choice, add_qa):
64
  global dataset_store
65
  if not files:
66
  return "⚠️ Файлы не выбраны.", gr.update(interactive=False), ""
67
-
68
  new_records = []
69
  log_lines = []
70
-
71
  for file_obj in files:
72
  path = file_obj
73
  name = os.path.basename(path)
74
  ext = Path(path).suffix.lower()
75
-
76
  if ext == ".docx":
77
  if not DOCX_AVAILABLE:
78
  log_lines.append(f"❌ {name}: python-docx не установлен")
@@ -84,11 +151,9 @@ def process_files(files, chunk_size, overlap, format_choice, add_qa):
84
  else:
85
  log_lines.append(f"⏭ {name}: неподдерживаемый формат")
86
  continue
87
-
88
  if not text.strip():
89
  log_lines.append(f"⚠️ {name}: пустой файл")
90
  continue
91
-
92
  chunks = split_into_chunks(text, int(chunk_size), int(overlap))
93
  for chunk in chunks:
94
  new_records.append({
@@ -97,13 +162,10 @@ def process_files(files, chunk_size, overlap, format_choice, add_qa):
97
  "output": chunk,
98
  "source": name,
99
  })
100
-
101
  if add_qa:
102
  qa_pairs = text_to_qa_pairs(text, name)
103
  new_records.extend(qa_pairs)
104
-
105
  log_lines.append(f"✅ {name}: {len(chunks)} чанков")
106
-
107
  dataset_store.extend(new_records)
108
  preview = "\n".join(
109
  f"[{i+1}] {r['output'][:120]}..." for i, r in enumerate(dataset_store[:5])
@@ -114,16 +176,13 @@ def process_files(files, chunk_size, overlap, format_choice, add_qa):
114
  preview,
115
  )
116
 
117
-
118
  def save_dataset(format_choice, dataset_name):
119
  global dataset_store
120
  if not dataset_store:
121
  return "⚠️ Буфер пустой.", None
122
-
123
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
124
  safe_name = re.sub(r"[^a-zA-Z0-9_\-]", "_", dataset_name or "dataset")
125
  fname_base = f"{safe_name}_{ts}"
126
-
127
  if format_choice == "JSONL":
128
  out_path = os.path.join(DATASET_DIR, fname_base + ".jsonl")
129
  with open(out_path, "w", encoding="utf-8") as f:
@@ -144,12 +203,10 @@ def save_dataset(format_choice, dataset_name):
144
  with open(out_path, "w", encoding="utf-8") as f:
145
  for rec in dataset_store:
146
  f.write(json.dumps(rec, ensure_ascii=False) + "\n")
147
-
148
  count = len(dataset_store)
149
  dataset_store = []
150
  return f"✅ Сохранено: {out_path} ({count} записей)", out_path
151
 
152
-
153
  def list_datasets():
154
  files = list(Path(DATASET_DIR).glob("*.*"))
155
  if not files:
@@ -157,20 +214,17 @@ def list_datasets():
157
  lines = [f"• {f.name} ({f.stat().st_size // 1024} KB)" for f in sorted(files)]
158
  return "\n".join(lines)
159
 
160
-
161
  def clear_buffer():
162
  global dataset_store
163
  dataset_store = []
164
  return "🗑️ Буфер очищен."
165
 
166
 
167
- COLAB_TEMPLATE = """## 🚀 Fine-tuning (LoRA) через Google Colab — Бесплатно
168
-
169
  ### 1. Установка
170
  ```python
171
  !pip install -q unsloth transformers peft datasets trl accelerate bitsandbytes
172
  ```
173
-
174
  ### 2. Загрузка модели
175
  ```python
176
  from unsloth import FastLanguageModel
@@ -186,53 +240,38 @@ model = FastLanguageModel.get_peft_model(
186
  use_gradient_checkpointing="unsloth",
187
  )
188
  ```
189
-
190
  ### 3. Загрузка датасета
191
  ```python
192
  from datasets import load_dataset
193
  dataset = load_dataset("json", data_files="DATASET_PLACEHOLDER", split="train")
194
-
195
  def format_prompt(ex):
196
  return {"text": f"### Instruction:\n{ex['instruction']}\n\n### Input:\n{ex['input']}\n\n### Response:\n{ex['output']}"}
197
-
198
  dataset = dataset.map(format_prompt)
199
  ```
200
-
201
  ### 4. Обучение
202
  ```python
203
  from trl import SFTTrainer
204
  from transformers import TrainingArguments
205
  import torch
206
-
207
  trainer = SFTTrainer(
208
  model=model, tokenizer=tokenizer,
209
  train_dataset=dataset, dataset_text_field="text",
210
  max_seq_length=2048,
211
  args=TrainingArguments(
212
- per_device_train_batch_size=2,
213
- gradient_accumulation_steps=4,
214
- num_train_epochs=3,
215
- learning_rate=2e-4,
216
- fp16=not torch.cuda.is_bf16_supported(),
217
- bf16=torch.cuda.is_bf16_supported(),
218
- output_dir="outputs",
219
- optim="adamw_8bit",
220
  ),
221
  )
222
  trainer.train()
223
  ```
224
-
225
  ### 5. Сохранение
226
  ```python
227
  model.save_pretrained_merged("my_style_model", tokenizer, save_method="merged_16bit")
228
- # Или на HuggingFace Hub:
229
- # model.push_to_hub("username/my-style-model", token="HF_TOKEN")
230
  ```
231
-
232
  ---
233
- **Бесплатные GPU:** Google Colab T4 | Kaggle (30ч/нед) | HuggingFace Spaces
234
- """
235
-
236
 
237
  def get_colab_guide(model_choice, dataset_path):
238
  model_map = {
@@ -248,27 +287,23 @@ def get_colab_guide(model_choice, dataset_path):
248
  guide = guide.replace("DATASET_PLACEHOLDER", dataset_path or "your_dataset.jsonl")
249
  return guide
250
 
251
-
252
  kb_store = {}
253
  KB_FILE = os.path.join(KB_DIR, "knowledge_base.json")
254
 
255
-
256
  def load_kb():
257
  global kb_store
258
  if os.path.exists(KB_FILE):
259
  with open(KB_FILE, encoding="utf-8") as f:
260
  kb_store = json.load(f)
261
 
262
-
263
  def save_kb_to_disk():
264
  with open(KB_FILE, "w", encoding="utf-8") as f:
265
  json.dump(kb_store, f, ensure_ascii=False, indent=2)
266
 
267
-
268
  def get_kb_choices():
269
  return list(kb_store.keys())
270
 
271
-
272
  def add_kb_entry(name, text, tags_raw, files):
273
  if not name.strip():
274
  return "⚠️ Введите название.", gr.update(choices=get_kb_choices())
@@ -283,19 +318,17 @@ def add_kb_entry(name, text, tags_raw, files):
283
  with open(fpath, encoding="utf-8", errors="ignore") as f:
284
  content += "\n\n" + f.read()
285
  kb_store[name.strip()] = {
286
- "text": content, "tags": tags,
 
287
  "created": datetime.now().isoformat(),
288
  }
289
  save_kb_to_disk()
290
  choices = get_kb_choices()
291
  return f"✅ '{name}' добавлена в базу.", gr.update(choices=choices)
292
 
293
-
294
  def refresh_kb():
295
  load_kb()
296
- choices = get_kb_choices()
297
- return gr.update(choices=choices)
298
-
299
 
300
  def get_kb_entry_content(name):
301
  if not name:
@@ -306,7 +339,6 @@ def get_kb_entry_content(name):
306
  return "", "", ""
307
  return entry.get("text", ""), ", ".join(entry.get("tags", [])), entry.get("created", "")
308
 
309
-
310
  def delete_kb_entry(name):
311
  if not name:
312
  return "⚠️ Выберите запись.", gr.update(choices=get_kb_choices())
@@ -317,7 +349,6 @@ def delete_kb_entry(name):
317
  return f"🗑️ '{name}' удалена.", gr.update(choices=get_kb_choices())
318
  return "⚠️ Не найдено.", gr.update(choices=get_kb_choices())
319
 
320
-
321
  def search_kb(query, selected_entries):
322
  load_kb()
323
  results = []
@@ -332,7 +363,6 @@ def search_kb(query, selected_entries):
332
  results.append(f"**{name}** [{', '.join(entry.get('tags', []))}]\n{snippet}...")
333
  return "\n\n---\n\n".join(results) if results else "🔍 Ничего не найдено."
334
 
335
-
336
  def compose_context_for_writing(selected_entries, writing_task):
337
  load_kb()
338
  context_parts = []
@@ -351,22 +381,427 @@ def compose_context_for_writing(selected_entries, writing_task):
351
 
352
  [Начало текста в стиле автора]:"""
353
 
354
-
355
  load_kb()
356
 
357
- css = "footer { display: none !important; }"
358
 
359
- with gr.Blocks(title="Writing Style AI", css=css, theme=gr.themes.Soft()) as demo:
360
- gr.Markdown("# ✍️ Writing Style AI\n### Клонирование авторского стиля: Dataset → Fine-tune → Knowledge Base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
 
362
  with gr.Tabs():
363
- # ── TAB 1: Dataset Builder ──────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  with gr.Tab("📦 Dataset Builder"):
365
- gr.Markdown("### Загрузите ваши тексты (DOCX / TXT) для создания датасета")
366
  with gr.Row():
367
  with gr.Column(scale=2):
368
  file_input = gr.File(
369
- label="📂 DOCX или TXT файлы",
370
  file_count="multiple",
371
  file_types=[".docx", ".txt"],
372
  )
@@ -378,13 +813,13 @@ with gr.Blocks(title="Writing Style AI", css=css, theme=gr.themes.Soft()) as dem
378
  dataset_name = gr.Textbox(label="Название датасета", value="my_dataset")
379
  with gr.Row():
380
  process_btn = gr.Button("⚙️ Обработать файлы", variant="primary")
381
- clear_btn = gr.Button("🗑️ Очистить буфер")
382
- save_btn = gr.Button("💾 Сохранить датасет", variant="primary", interactive=False)
383
  with gr.Column(scale=1):
384
  status_box = gr.Textbox(label="Статус", lines=8)
385
  preview_box = gr.Textbox(label="Превью записей", lines=8)
386
- file_output = gr.File(label="📥 Скачать датасет")
387
- datasets_list = gr.Textbox(label="📚 Сохранённые датасеты", lines=4, value=list_datasets)
388
 
389
  process_btn.click(
390
  process_files,
@@ -398,15 +833,15 @@ with gr.Blocks(title="Writing Style AI", css=css, theme=gr.themes.Soft()) as dem
398
  ).then(lambda: list_datasets(), outputs=datasets_list)
399
  clear_btn.click(clear_buffer, outputs=status_box)
400
 
401
- # ── TAB 2: Fine-tuning Guide ────────────────────────────────────
402
  with gr.Tab("🧠 Fine-tuning (LoRA)"):
403
- gr.Markdown("### Бесплатное обучение LoRA через Google Colab")
404
  with gr.Row():
405
  with gr.Column():
406
  model_choice = gr.Radio(
407
  ["Mistral 7B (рекомендуется)", "Llama 3.1 8B", "Gemma 2 9B", "Phi-3 Mini (быстрее)"],
408
  value="Mistral 7B (рекомендуется)",
409
- label="🤖 Базовая модель",
410
  )
411
  dataset_path_input = gr.Textbox(
412
  label="Путь к датасету",
@@ -416,11 +851,11 @@ with gr.Blocks(title="Writing Style AI", css=css, theme=gr.themes.Soft()) as dem
416
  with gr.Column(scale=2):
417
  guide_output = gr.Markdown()
418
  guide_btn.click(get_colab_guide, inputs=[model_choice, dataset_path_input], outputs=guide_output)
419
- gr.Markdown("**Ресурсы:** [Google Colab](https://colab.research.google.com) | [Unsloth](https://github.com/unslothai/unsloth) | [HuggingFace](https://huggingface.co) | [Kaggle](https://www.kaggle.com/code)")
420
 
421
- # ── TAB 3: Knowledge Base ───────────────────────────────────────
422
  with gr.Tab("📚 Knowledge Base"):
423
- gr.Markdown("### База знаний — добавляйте контекст для генерации текстов")
424
  with gr.Tabs():
425
  with gr.Tab("➕ Добавить запись"):
426
  with gr.Row():
@@ -429,7 +864,7 @@ with gr.Blocks(title="Writing Style AI", css=css, theme=gr.themes.Soft()) as dem
429
  kb_tags = gr.Textbox(label="Теги (через запятую)", placeholder="стиль, технический")
430
  kb_text = gr.Textbox(label="Текст / Контекст", lines=8)
431
  kb_files = gr.File(
432
- label="📂 Загрузить DOCX/TXT",
433
  file_count="multiple",
434
  file_types=[".docx", ".txt"],
435
  )
@@ -440,15 +875,15 @@ with gr.Blocks(title="Writing Style AI", css=css, theme=gr.themes.Soft()) as dem
440
 
441
  with gr.Tab("🔍 Просмотр и поиск"):
442
  with gr.Row():
443
- kb_list_view = gr.CheckboxGroup(label="Записи в базе (выберите для поиска)", choices=get_kb_choices())
444
- refresh_btn = gr.Button("🔄 Обновить список")
445
  with gr.Row():
446
- search_query = gr.Textbox(label="🔍 Поиск", placeholder="ключевое слово...")
447
- search_btn = gr.Button("Найти")
448
  search_results = gr.Markdown()
449
  with gr.Row():
450
  view_entry = gr.Dropdown(label="Просмотреть запись", choices=get_kb_choices())
451
- view_btn = gr.Button("👁 Показать")
452
  del_btn = gr.Button("🗑️ Удалить", variant="stop")
453
  kb_del_status = gr.Textbox(label="Статус", lines=2)
454
  with gr.Row():
@@ -456,36 +891,52 @@ with gr.Blocks(title="Writing Style AI", css=css, theme=gr.themes.Soft()) as dem
456
  entry_tags = gr.Textbox(label="Теги", interactive=False)
457
  entry_created = gr.Textbox(label="Создано", interactive=False)
458
 
459
- with gr.Tab("✍️ Генерация с контекстом"):
 
460
  gen_entries = gr.CheckboxGroup(label="Выберите записи из KB", choices=get_kb_choices())
461
- refresh_gen_btn = gr.Button("🔄 Обновить")
462
  writing_task = gr.Textbox(
463
  label="Задание для генерации",
464
  placeholder="Напиши статью про... / Создай пост о...",
465
  lines=3,
466
  )
467
- compose_btn = gr.Button("📝 Сформировать промпт", variant="primary")
 
 
468
  composed_prompt = gr.Textbox(
469
- label="Готовый промпт (скопируйте в вашу модель)",
 
 
 
 
 
470
  lines=12,
471
  show_copy_button=True,
472
  )
473
 
474
- # Wire up events
475
  kb_add_btn.click(
476
  add_kb_entry,
477
  inputs=[kb_name, kb_text, kb_tags, kb_files],
478
  outputs=[kb_status, kb_list_add],
479
  )
480
-
481
  refresh_btn.click(refresh_kb, outputs=kb_list_view)
482
  search_btn.click(search_kb, inputs=[search_query, kb_list_view], outputs=search_results)
483
  view_btn.click(get_kb_entry_content, inputs=[view_entry], outputs=[entry_text, entry_tags, entry_created])
484
  del_btn.click(delete_kb_entry, inputs=[view_entry], outputs=[kb_del_status, kb_list_view])
485
  refresh_gen_btn.click(refresh_kb, outputs=gen_entries)
486
  compose_btn.click(compose_context_for_writing, inputs=[gen_entries, writing_task], outputs=composed_prompt)
 
 
 
 
 
487
 
488
- gr.Markdown("---\n<center>✍️ <b>Writing Style AI</b> — Dataset Builder • LoRA Fine-tuning • Knowledge Base</center>")
 
 
 
 
489
 
490
  if __name__ == "__main__":
491
  demo.launch()
 
3
  import os
4
  import re
5
  import csv
6
+ import requests
7
  from datetime import datetime
8
  from pathlib import Path
9
 
 
18
  os.makedirs(DATASET_DIR, exist_ok=True)
19
  os.makedirs(KB_DIR, exist_ok=True)
20
 
21
+ # ─── AI Providers ───────────────────────────────────────────────────────────
22
+ def call_openai(prompt, model="gpt-3.5-turbo", api_key=""):
23
+ if not api_key:
24
+ return "⚠️ Введите API ключ OpenAI"
25
+ try:
26
+ from openai import OpenAI
27
+ client = OpenAI(api_key=api_key)
28
+ resp = client.chat.completions.create(
29
+ model=model,
30
+ messages=[{"role": "user", "content": prompt}],
31
+ max_tokens=2000,
32
+ )
33
+ return resp.choices[0].message.content
34
+ except Exception as e:
35
+ return f"❌ OpenAI error: {e}"
36
+
37
+ def call_hf_inference(prompt, model="mistralai/Mistral-7B-Instruct-v0.3", api_key=""):
38
+ url = f"https://api-inference.huggingface.co/models/{model}"
39
+ headers = {}
40
+ if api_key:
41
+ headers["Authorization"] = f"Bearer {api_key}"
42
+ payload = {"inputs": prompt, "parameters": {"max_new_tokens": 800, "temperature": 0.7}}
43
+ try:
44
+ resp = requests.post(url, headers=headers, json=payload, timeout=60)
45
+ data = resp.json()
46
+ if isinstance(data, list) and data:
47
+ return data[0].get("generated_text", str(data))
48
+ if isinstance(data, dict):
49
+ if "error" in data:
50
+ return f"⚠️ HF: {data['error']}"
51
+ return data.get("generated_text", str(data))
52
+ return str(data)
53
+ except Exception as e:
54
+ return f"❌ HF error: {e}"
55
+
56
+ def call_ollama(prompt, model="llama3.2", host="http://localhost:11434"):
57
+ try:
58
+ resp = requests.post(
59
+ f"{host}/api/generate",
60
+ json={"model": model, "prompt": prompt, "stream": False},
61
+ timeout=120,
62
+ )
63
+ return resp.json().get("response", "No response")
64
+ except Exception as e:
65
+ return f"❌ Ollama error: {e}"
66
+
67
+ def call_groq(prompt, model="llama-3.1-8b-instant", api_key=""):
68
+ if not api_key:
69
+ return "⚠️ Введите API ключ Groq"
70
+ try:
71
+ from openai import OpenAI
72
+ client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
73
+ resp = client.chat.completions.create(
74
+ model=model,
75
+ messages=[{"role": "user", "content": prompt}],
76
+ max_tokens=2000,
77
+ )
78
+ return resp.choices[0].message.content
79
+ except Exception as e:
80
+ return f"❌ Groq error: {e}"
81
+
82
+ def call_ai(prompt, provider, model, api_key, ollama_host):
83
+ if provider == "🤗 HuggingFace (бесплатно)":
84
+ return call_hf_inference(prompt, model, api_key)
85
+ elif provider == "⚡ Groq (бесплатно)":
86
+ return call_groq(prompt, model, api_key)
87
+ elif provider == "🏠 Ollama (локально)":
88
+ return call_ollama(prompt, model, ollama_host)
89
+ elif provider == "🔵 OpenAI":
90
+ return call_openai(prompt, model, api_key)
91
+ return "⚠️ Выберите провайдера"
92
+
93
+
94
+ # ─── Dataset functions ────────────────────────────────────────────────────────
95
  def extract_text_from_docx(file_path):
96
  if not DOCX_AVAILABLE:
97
  return ""
 
99
  paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
100
  return "\n\n".join(paragraphs)
101
 
 
102
  def split_into_chunks(text, chunk_size=512, overlap=64):
103
  words = text.split()
104
  chunks = []
 
109
  i += chunk_size - overlap
110
  return chunks
111
 
 
112
  def text_to_qa_pairs(text, source_name=""):
113
  paragraphs = [p.strip() for p in text.split("\n\n") if len(p.strip()) > 60]
114
  pairs = []
 
128
  })
129
  return pairs
130
 
 
131
  dataset_store = []
132
 
 
133
  def process_files(files, chunk_size, overlap, format_choice, add_qa):
134
  global dataset_store
135
  if not files:
136
  return "⚠️ Файлы не выбраны.", gr.update(interactive=False), ""
 
137
  new_records = []
138
  log_lines = []
 
139
  for file_obj in files:
140
  path = file_obj
141
  name = os.path.basename(path)
142
  ext = Path(path).suffix.lower()
 
143
  if ext == ".docx":
144
  if not DOCX_AVAILABLE:
145
  log_lines.append(f"❌ {name}: python-docx не установлен")
 
151
  else:
152
  log_lines.append(f"⏭ {name}: неподдерживаемый формат")
153
  continue
 
154
  if not text.strip():
155
  log_lines.append(f"⚠️ {name}: пустой файл")
156
  continue
 
157
  chunks = split_into_chunks(text, int(chunk_size), int(overlap))
158
  for chunk in chunks:
159
  new_records.append({
 
162
  "output": chunk,
163
  "source": name,
164
  })
 
165
  if add_qa:
166
  qa_pairs = text_to_qa_pairs(text, name)
167
  new_records.extend(qa_pairs)
 
168
  log_lines.append(f"✅ {name}: {len(chunks)} чанков")
 
169
  dataset_store.extend(new_records)
170
  preview = "\n".join(
171
  f"[{i+1}] {r['output'][:120]}..." for i, r in enumerate(dataset_store[:5])
 
176
  preview,
177
  )
178
 
 
179
  def save_dataset(format_choice, dataset_name):
180
  global dataset_store
181
  if not dataset_store:
182
  return "⚠️ Буфер пустой.", None
 
183
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
184
  safe_name = re.sub(r"[^a-zA-Z0-9_\-]", "_", dataset_name or "dataset")
185
  fname_base = f"{safe_name}_{ts}"
 
186
  if format_choice == "JSONL":
187
  out_path = os.path.join(DATASET_DIR, fname_base + ".jsonl")
188
  with open(out_path, "w", encoding="utf-8") as f:
 
203
  with open(out_path, "w", encoding="utf-8") as f:
204
  for rec in dataset_store:
205
  f.write(json.dumps(rec, ensure_ascii=False) + "\n")
 
206
  count = len(dataset_store)
207
  dataset_store = []
208
  return f"✅ Сохранено: {out_path} ({count} записей)", out_path
209
 
 
210
  def list_datasets():
211
  files = list(Path(DATASET_DIR).glob("*.*"))
212
  if not files:
 
214
  lines = [f"• {f.name} ({f.stat().st_size // 1024} KB)" for f in sorted(files)]
215
  return "\n".join(lines)
216
 
 
217
  def clear_buffer():
218
  global dataset_store
219
  dataset_store = []
220
  return "🗑️ Буфер очищен."
221
 
222
 
223
+ COLAB_TEMPLATE = """## 🚀 Fine-tuning (LoRA) — Бесплатно через Google Colab
 
224
  ### 1. Установка
225
  ```python
226
  !pip install -q unsloth transformers peft datasets trl accelerate bitsandbytes
227
  ```
 
228
  ### 2. Загрузка модели
229
  ```python
230
  from unsloth import FastLanguageModel
 
240
  use_gradient_checkpointing="unsloth",
241
  )
242
  ```
 
243
  ### 3. Загрузка датасета
244
  ```python
245
  from datasets import load_dataset
246
  dataset = load_dataset("json", data_files="DATASET_PLACEHOLDER", split="train")
 
247
  def format_prompt(ex):
248
  return {"text": f"### Instruction:\n{ex['instruction']}\n\n### Input:\n{ex['input']}\n\n### Response:\n{ex['output']}"}
 
249
  dataset = dataset.map(format_prompt)
250
  ```
 
251
  ### 4. Обучение
252
  ```python
253
  from trl import SFTTrainer
254
  from transformers import TrainingArguments
255
  import torch
 
256
  trainer = SFTTrainer(
257
  model=model, tokenizer=tokenizer,
258
  train_dataset=dataset, dataset_text_field="text",
259
  max_seq_length=2048,
260
  args=TrainingArguments(
261
+ per_device_train_batch_size=2, gradient_accumulation_steps=4,
262
+ num_train_epochs=3, learning_rate=2e-4,
263
+ fp16=not torch.cuda.is_bf16_supported(), bf16=torch.cuda.is_bf16_supported(),
264
+ output_dir="outputs", optim="adamw_8bit",
 
 
 
 
265
  ),
266
  )
267
  trainer.train()
268
  ```
 
269
  ### 5. Сохранение
270
  ```python
271
  model.save_pretrained_merged("my_style_model", tokenizer, save_method="merged_16bit")
 
 
272
  ```
 
273
  ---
274
+ **Бесплатные GPU:** Google Colab T4 | Kaggle (30ч/нед) | HuggingFace Spaces"""
 
 
275
 
276
  def get_colab_guide(model_choice, dataset_path):
277
  model_map = {
 
287
  guide = guide.replace("DATASET_PLACEHOLDER", dataset_path or "your_dataset.jsonl")
288
  return guide
289
 
290
+ # ─── Knowledge Base ──────────────────────────────────────────────────────────
291
  kb_store = {}
292
  KB_FILE = os.path.join(KB_DIR, "knowledge_base.json")
293
 
 
294
  def load_kb():
295
  global kb_store
296
  if os.path.exists(KB_FILE):
297
  with open(KB_FILE, encoding="utf-8") as f:
298
  kb_store = json.load(f)
299
 
 
300
  def save_kb_to_disk():
301
  with open(KB_FILE, "w", encoding="utf-8") as f:
302
  json.dump(kb_store, f, ensure_ascii=False, indent=2)
303
 
 
304
  def get_kb_choices():
305
  return list(kb_store.keys())
306
 
 
307
  def add_kb_entry(name, text, tags_raw, files):
308
  if not name.strip():
309
  return "⚠️ Введите название.", gr.update(choices=get_kb_choices())
 
318
  with open(fpath, encoding="utf-8", errors="ignore") as f:
319
  content += "\n\n" + f.read()
320
  kb_store[name.strip()] = {
321
+ "text": content,
322
+ "tags": tags,
323
  "created": datetime.now().isoformat(),
324
  }
325
  save_kb_to_disk()
326
  choices = get_kb_choices()
327
  return f"✅ '{name}' добавлена в базу.", gr.update(choices=choices)
328
 
 
329
  def refresh_kb():
330
  load_kb()
331
+ return gr.update(choices=get_kb_choices())
 
 
332
 
333
  def get_kb_entry_content(name):
334
  if not name:
 
339
  return "", "", ""
340
  return entry.get("text", ""), ", ".join(entry.get("tags", [])), entry.get("created", "")
341
 
 
342
  def delete_kb_entry(name):
343
  if not name:
344
  return "⚠️ Выберите запись.", gr.update(choices=get_kb_choices())
 
349
  return f"🗑️ '{name}' удалена.", gr.update(choices=get_kb_choices())
350
  return "⚠️ Не найдено.", gr.update(choices=get_kb_choices())
351
 
 
352
  def search_kb(query, selected_entries):
353
  load_kb()
354
  results = []
 
363
  results.append(f"**{name}** [{', '.join(entry.get('tags', []))}]\n{snippet}...")
364
  return "\n\n---\n\n".join(results) if results else "🔍 Ничего не найдено."
365
 
 
366
  def compose_context_for_writing(selected_entries, writing_task):
367
  load_kb()
368
  context_parts = []
 
381
 
382
  [Начало текста в стиле автора]:"""
383
 
 
384
  load_kb()
385
 
 
386
 
387
+ # ─── macOS CSS ─────────────────────────────────────────────────────────────
388
+ macos_css = """
389
+ /* === macOS System Font === */
390
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
391
+
392
+ * { font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', 'SF Pro Text', 'Inter', 'Helvetica Neue', Arial, sans-serif !important; }
393
+
394
+ /* === Base === */
395
+ body, .gradio-container {
396
+ background: #f2f2f7 !important;
397
+ color: #1c1c1e !important;
398
+ }
399
+
400
+ footer { display: none !important; }
401
+
402
+ /* === Window chrome === */
403
+ .gradio-container > .main {
404
+ background: #f2f2f7 !important;
405
+ }
406
+
407
+ /* === Panels === */
408
+ .panel, .block, .form {
409
+ background: rgba(255,255,255,0.85) !important;
410
+ border: 1px solid rgba(0,0,0,0.08) !important;
411
+ border-radius: 12px !important;
412
+ box-shadow: 0 1px 3px rgba(0,0,0,0.08), 0 4px 16px rgba(0,0,0,0.04) !important;
413
+ backdrop-filter: blur(20px) !important;
414
+ }
415
+
416
+ /* === Tab bar === */
417
+ .tabs > .tab-nav {
418
+ background: rgba(255,255,255,0.7) !important;
419
+ border-radius: 10px !important;
420
+ padding: 4px !important;
421
+ border: 1px solid rgba(0,0,0,0.08) !important;
422
+ backdrop-filter: blur(20px) !important;
423
+ }
424
+
425
+ .tabs > .tab-nav button {
426
+ border-radius: 7px !important;
427
+ font-size: 13px !important;
428
+ font-weight: 500 !important;
429
+ color: #8e8e93 !important;
430
+ padding: 6px 14px !important;
431
+ transition: all 0.15s ease !important;
432
+ background: transparent !important;
433
+ border: none !important;
434
+ }
435
+
436
+ .tabs > .tab-nav button.selected {
437
+ background: #ffffff !important;
438
+ color: #1c1c1e !important;
439
+ font-weight: 600 !important;
440
+ box-shadow: 0 1px 4px rgba(0,0,0,0.15) !important;
441
+ }
442
+
443
+ /* === Buttons === */
444
+ button.primary, .btn-primary, button[variant="primary"] {
445
+ background: #007aff !important;
446
+ color: #ffffff !important;
447
+ border: none !important;
448
+ border-radius: 8px !important;
449
+ font-weight: 600 !important;
450
+ font-size: 13px !important;
451
+ padding: 8px 18px !important;
452
+ transition: all 0.15s ease !important;
453
+ box-shadow: 0 1px 3px rgba(0,122,255,0.3) !important;
454
+ }
455
+
456
+ button.primary:hover, button[variant="primary"]:hover {
457
+ background: #0071eb !important;
458
+ transform: translateY(-0.5px) !important;
459
+ box-shadow: 0 2px 8px rgba(0,122,255,0.4) !important;
460
+ }
461
+
462
+ button.stop, button[variant="stop"] {
463
+ background: #ff3b30 !important;
464
+ color: #fff !important;
465
+ border: none !important;
466
+ border-radius: 8px !important;
467
+ font-weight: 600 !important;
468
+ font-size: 13px !important;
469
+ }
470
+
471
+ button.secondary, button[variant="secondary"] {
472
+ background: rgba(0,0,0,0.05) !important;
473
+ color: #1c1c1e !important;
474
+ border: 1px solid rgba(0,0,0,0.12) !important;
475
+ border-radius: 8px !important;
476
+ font-weight: 500 !important;
477
+ font-size: 13px !important;
478
+ }
479
+
480
+ /* === Inputs === */
481
+ input[type="text"], textarea, .input-text, select {
482
+ background: rgba(255,255,255,0.9) !important;
483
+ border: 1px solid rgba(0,0,0,0.15) !important;
484
+ border-radius: 8px !important;
485
+ color: #1c1c1e !important;
486
+ font-size: 13px !important;
487
+ padding: 8px 10px !important;
488
+ transition: border 0.15s ease, box-shadow 0.15s ease !important;
489
+ }
490
+
491
+ input[type="text"]:focus, textarea:focus {
492
+ border-color: #007aff !important;
493
+ box-shadow: 0 0 0 3px rgba(0,122,255,0.15) !important;
494
+ outline: none !important;
495
+ }
496
+
497
+ /* === Labels === */
498
+ label, .label-wrap span, .block > label > span {
499
+ font-size: 12px !important;
500
+ font-weight: 600 !important;
501
+ color: #6e6e73 !important;
502
+ letter-spacing: 0.3px !important;
503
+ text-transform: uppercase !important;
504
+ }
505
+
506
+ /* === Headings === */
507
+ h1 { font-size: 28px !important; font-weight: 700 !important; letter-spacing: -0.5px !important; color: #1c1c1e !important; }
508
+ h2 { font-size: 20px !important; font-weight: 600 !important; color: #1c1c1e !important; }
509
+ h3 { font-size: 15px !important; font-weight: 600 !important; color: #1c1c1e !important; }
510
+
511
+ /* === Sliders === */
512
+ input[type="range"] {
513
+ accent-color: #007aff !important;
514
+ }
515
+
516
+ /* === Checkboxes === */
517
+ input[type="checkbox"] {
518
+ accent-color: #34c759 !important;
519
+ width: 16px !important;
520
+ height: 16px !important;
521
+ }
522
+
523
+ /* === Radio === */
524
+ input[type="radio"] {
525
+ accent-color: #007aff !important;
526
+ }
527
+
528
+ /* === File upload === */
529
+ .upload-container, [data-testid="file-upload"] {
530
+ background: rgba(0,122,255,0.04) !important;
531
+ border: 1.5px dashed rgba(0,122,255,0.3) !important;
532
+ border-radius: 12px !important;
533
+ transition: all 0.2s ease !important;
534
+ }
535
+
536
+ .upload-container:hover {
537
+ background: rgba(0,122,255,0.08) !important;
538
+ border-color: #007aff !important;
539
+ }
540
+
541
+ /* === Sidebar / columns === */
542
+ .gap {
543
+ gap: 12px !important;
544
+ }
545
+
546
+ /* === macOS window title bar decoration === */
547
+ .app-header {
548
+ display: flex;
549
+ align-items: center;
550
+ gap: 8px;
551
+ padding: 0 0 16px 4px;
552
+ }
553
+
554
+ .traffic-lights {
555
+ display: flex;
556
+ gap: 6px;
557
+ align-items: center;
558
+ }
559
+
560
+ .tl { width:12px; height:12px; border-radius:50%; display:inline-block; }
561
+ .tl-red { background:#ff5f57; border:1px solid #e0443e; }
562
+ .tl-yellow { background:#febc2e; border:1px solid #d4a000; }
563
+ .tl-green { background:#28c840; border:1px solid #1aab29; }
564
+
565
+ /* === Status boxes === */
566
+ .textbox textarea {
567
+ font-family: 'SF Mono', 'Menlo', 'Monaco', monospace !important;
568
+ font-size: 12px !important;
569
+ line-height: 1.5 !important;
570
+ }
571
+
572
+ /* === Markdown output === */
573
+ .prose, .markdown-text {
574
+ font-size: 14px !important;
575
+ line-height: 1.6 !important;
576
+ color: #1c1c1e !important;
577
+ }
578
+
579
+ /* === AI Chat bubbles === */
580
+ .message-wrap .message {
581
+ border-radius: 16px !important;
582
+ font-size: 14px !important;
583
+ }
584
+
585
+ /* === Scrollbar === */
586
+ ::-webkit-scrollbar { width: 6px; height: 6px; }
587
+ ::-webkit-scrollbar-track { background: transparent; }
588
+ ::-webkit-scrollbar-thumb { background: rgba(0,0,0,0.2); border-radius: 3px; }
589
+ ::-webkit-scrollbar-thumb:hover { background: rgba(0,0,0,0.35); }
590
+
591
+ /* === AI Chat area === */
592
+ .chatbot { border-radius: 12px !important; }
593
+
594
+ /* === Provider selector === */
595
+ .provider-pill {
596
+ display: inline-flex;
597
+ align-items: center;
598
+ padding: 4px 10px;
599
+ border-radius: 20px;
600
+ font-size: 12px;
601
+ font-weight: 600;
602
+ margin: 2px;
603
+ }
604
+ """
605
+
606
+
607
+ # ─── Gradio UI ───────────────────────────────────────────────────────────────
608
+
609
+ PROVIDER_MODELS = {
610
+ "🤗 HuggingFace (бесплатно)": [
611
+ "mistralai/Mistral-7B-Instruct-v0.3",
612
+ "meta-llama/Meta-Llama-3-8B-Instruct",
613
+ "HuggingFaceH4/zephyr-7b-beta",
614
+ "google/gemma-7b-it",
615
+ "tiiuae/falcon-7b-instruct",
616
+ ],
617
+ "⚡ Groq (бесплатно)": [
618
+ "llama-3.1-8b-instant",
619
+ "llama-3.3-70b-versatile",
620
+ "mixtral-8x7b-32768",
621
+ "gemma2-9b-it",
622
+ ],
623
+ "🏠 Ollama (локально)": [
624
+ "llama3.2",
625
+ "llama3.1",
626
+ "mistral",
627
+ "phi3",
628
+ "gemma2",
629
+ "qwen2.5",
630
+ ],
631
+ "🔵 OpenAI": [
632
+ "gpt-4o-mini",
633
+ "gpt-4o",
634
+ "gpt-3.5-turbo",
635
+ ],
636
+ }
637
+
638
+ def update_models(provider):
639
+ models = PROVIDER_MODELS.get(provider, [])
640
+ return gr.update(choices=models, value=models[0] if models else "")
641
+
642
+ # AI chat history
643
+ chat_history = []
644
+
645
+ def ai_chat(message, history, provider, model, api_key, ollama_host, system_prompt):
646
+ if not message.strip():
647
+ return history, ""
648
+ full_prompt = message
649
+ if system_prompt.strip():
650
+ full_prompt = f"{system_prompt}\n\nПользователь: {message}"
651
+ # Add context from history
652
+ if history:
653
+ ctx = "\n".join([f"User: {h[0]}\nAssistant: {h[1]}" for h in history[-3:]])
654
+ full_prompt = ctx + "\n\nUser: " + message
655
+ if system_prompt.strip():
656
+ full_prompt = system_prompt + "\n\n" + full_prompt
657
+ response = call_ai(full_prompt, provider, model, api_key, ollama_host)
658
+ history = history + [[message, response]]
659
+ return history, ""
660
+
661
+ def ai_generate_text(prompt, provider, model, api_key, ollama_host):
662
+ if not prompt.strip():
663
+ return "⚠️ Введите запрос"
664
+ return call_ai(prompt, provider, model, api_key, ollama_host)
665
+
666
+ def ai_generate_with_kb(selected_entries, writing_task, provider, model, api_key, ollama_host):
667
+ prompt = compose_context_for_writing(selected_entries, writing_task)
668
+ if prompt.startswith("⚠️"):
669
+ return prompt
670
+ return call_ai(prompt, provider, model, api_key, ollama_host)
671
+
672
+ def clear_chat():
673
+ return [], ""
674
+
675
+
676
+ with gr.Blocks(title="Writing Style AI", css=macos_css, theme=gr.themes.Default(
677
+ font=gr.themes.GoogleFont("Inter"),
678
+ primary_hue="blue",
679
+ secondary_hue="gray",
680
+ neutral_hue="gray",
681
+ )) as demo:
682
+
683
+ # macOS window title
684
+ gr.HTML("""
685
+ <div class="app-header">
686
+ <div class="traffic-lights">
687
+ <span class="tl tl-red"></span>
688
+ <span class="tl tl-yellow"></span>
689
+ <span class="tl tl-green"></span>
690
+ </div>
691
+ <div style="display:flex;align-items:center;gap:10px;margin-left:12px;">
692
+ <span style="font-size:22px;">✍️</span>
693
+ <div>
694
+ <div style="font-size:17px;font-weight:700;color:#1c1c1e;letter-spacing:-0.3px;">Writing Style AI</div>
695
+ <div style="font-size:11px;color:#8e8e93;font-weight:500;">Dataset · Fine-tune · Knowledge Base · AI Chat</div>
696
+ </div>
697
+ </div>
698
+ </div>
699
+ """)
700
 
701
  with gr.Tabs():
702
+
703
+ # ── TAB 0: AI Assistant ──────────────────────────────────────────────
704
+ with gr.Tab("🤖 AI Ассистент"):
705
+
706
+ # Provider config row
707
+ with gr.Row():
708
+ with gr.Column(scale=1):
709
+ gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin-bottom:6px;">ПРОВАЙДЕР</div>')
710
+ ai_provider = gr.Radio(
711
+ choices=list(PROVIDER_MODELS.keys()),
712
+ value="🤗 HuggingFace (бесплатно)",
713
+ label="",
714
+ elem_id="provider_radio",
715
+ )
716
+ with gr.Column(scale=1):
717
+ ai_model = gr.Dropdown(
718
+ choices=PROVIDER_MODELS["🤗 HuggingFace (бесплатно)"],
719
+ value="mistralai/Mistral-7B-Instruct-v0.3",
720
+ label="Модель",
721
+ )
722
+ ai_api_key = gr.Textbox(
723
+ label="API ключ",
724
+ placeholder="sk-... или hf_... (необязательно для HF)",
725
+ type="password",
726
+ )
727
+ ai_ollama_host = gr.Textbox(
728
+ label="Ollama Host",
729
+ value="http://localhost:11434",
730
+ visible=False,
731
+ )
732
+
733
+ # Chat area
734
+ with gr.Row():
735
+ with gr.Column(scale=2):
736
+ ai_system_prompt = gr.Textbox(
737
+ label="System Prompt (необязательно)",
738
+ placeholder="Ты — профессиональный писатель в стиле Хемингуэя...",
739
+ lines=2,
740
+ )
741
+ ai_chatbot = gr.Chatbot(
742
+ label="",
743
+ height=420,
744
+ show_copy_button=True,
745
+ avatar_images=["🧑", "🤖"],
746
+ elem_id="macos_chat",
747
+ )
748
+ with gr.Row():
749
+ ai_input = gr.Textbox(
750
+ label="",
751
+ placeholder="Напишите сообщение... (Enter для отправки)",
752
+ lines=2,
753
+ scale=4,
754
+ )
755
+ with gr.Column(scale=1, min_width=100):
756
+ ai_send_btn = gr.Button("Отправить ↑", variant="primary")
757
+ ai_clear_btn = gr.Button("Очистить", variant="secondary")
758
+
759
+ with gr.Column(scale=1):
760
+ gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin-bottom:8px;">БЫСТРЫЕ ЗАПРОСЫ</div>')
761
+ gr.HTML("""
762
+ <div style="display:flex;flex-direction:column;gap:6px;">
763
+ <div style="background:rgba(0,122,255,0.08);border:1px solid rgba(0,122,255,0.2);border-radius:8px;padding:10px;font-size:13px;cursor:pointer;" onclick="document.querySelector('#macos_chat input, textarea').value='Напиши статью про ИИ в стиле научпоп'">📄 Статья в стиле научпоп</div>
764
+ <div style="background:rgba(52,199,89,0.08);border:1px solid rgba(52,199,89,0.2);border-radius:8px;padding:10px;font-size:13px;" >💡 Генерация идей</div>
765
+ <div style="background:rgba(255,149,0,0.08);border:1px solid rgba(255,149,0,0.2);border-radius:8px;padding:10px;font-size:13px;" >✏️ Редактирование текста</div>
766
+ <div style="background:rgba(175,82,222,0.08);border:1px solid rgba(175,82,222,0.2);border-radius:8px;padding:10px;font-size:13px;" >🔄 Перевод и локализация</div>
767
+ </div>
768
+ """)
769
+
770
+ gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin:16px 0 8px;">ОДИНОЧНЫЙ ЗАПРОС</div>')
771
+ single_prompt = gr.Textbox(label="Промпт", lines=4, placeholder="Введите промпт...")
772
+ single_generate_btn = gr.Button("⚡ Генерировать", variant="primary")
773
+ single_output = gr.Textbox(label="Результат", lines=8, show_copy_button=True)
774
+
775
+ # Wire AI tab events
776
+ ai_provider.change(update_models, inputs=[ai_provider], outputs=[ai_model])
777
+ ai_provider.change(
778
+ lambda p: gr.update(visible=p == "🏠 Ollama (локально)"),
779
+ inputs=[ai_provider], outputs=[ai_ollama_host]
780
+ )
781
+ ai_send_btn.click(
782
+ ai_chat,
783
+ inputs=[ai_input, ai_chatbot, ai_provider, ai_model, ai_api_key, ai_ollama_host, ai_system_prompt],
784
+ outputs=[ai_chatbot, ai_input],
785
+ )
786
+ ai_input.submit(
787
+ ai_chat,
788
+ inputs=[ai_input, ai_chatbot, ai_provider, ai_model, ai_api_key, ai_ollama_host, ai_system_prompt],
789
+ outputs=[ai_chatbot, ai_input],
790
+ )
791
+ ai_clear_btn.click(clear_chat, outputs=[ai_chatbot, ai_input])
792
+ single_generate_btn.click(
793
+ ai_generate_text,
794
+ inputs=[single_prompt, ai_provider, ai_model, ai_api_key, ai_ollama_host],
795
+ outputs=[single_output],
796
+ )
797
+
798
+ # ── TAB 1: Dataset Builder ───────────────────────────────────────────
799
  with gr.Tab("📦 Dataset Builder"):
800
+ gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">Загрузите ваши тексты (DOCX / TXT) для создания датасета</h3>')
801
  with gr.Row():
802
  with gr.Column(scale=2):
803
  file_input = gr.File(
804
+ label="DOCX или TXT файлы",
805
  file_count="multiple",
806
  file_types=[".docx", ".txt"],
807
  )
 
813
  dataset_name = gr.Textbox(label="Название датасета", value="my_dataset")
814
  with gr.Row():
815
  process_btn = gr.Button("⚙️ Обработать файлы", variant="primary")
816
+ clear_btn = gr.Button("🗑️ Очистить буфер", variant="secondary")
817
+ save_btn = gr.Button("💾 Сохранить датасет", variant="primary", interactive=False)
818
  with gr.Column(scale=1):
819
  status_box = gr.Textbox(label="Статус", lines=8)
820
  preview_box = gr.Textbox(label="Превью записей", lines=8)
821
+ file_output = gr.File(label="Скачать датасет")
822
+ datasets_list = gr.Textbox(label="Сохранённые датасеты", lines=4, value=list_datasets)
823
 
824
  process_btn.click(
825
  process_files,
 
833
  ).then(lambda: list_datasets(), outputs=datasets_list)
834
  clear_btn.click(clear_buffer, outputs=status_box)
835
 
836
+ # ── TAB 2: Fine-tuning Guide ─────────────────────────────────────────
837
  with gr.Tab("🧠 Fine-tuning (LoRA)"):
838
+ gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">Бесплатное обучение LoRA через Google Colab</h3>')
839
  with gr.Row():
840
  with gr.Column():
841
  model_choice = gr.Radio(
842
  ["Mistral 7B (рекомендуется)", "Llama 3.1 8B", "Gemma 2 9B", "Phi-3 Mini (быстрее)"],
843
  value="Mistral 7B (рекомендуется)",
844
+ label="Базовая модель",
845
  )
846
  dataset_path_input = gr.Textbox(
847
  label="Путь к датасету",
 
851
  with gr.Column(scale=2):
852
  guide_output = gr.Markdown()
853
  guide_btn.click(get_colab_guide, inputs=[model_choice, dataset_path_input], outputs=guide_output)
854
+ gr.HTML('<div style="margin-top:12px;font-size:13px;color:#6e6e73;"><b>Ресурсы:</b> <a href="https://colab.research.google.com" target="_blank" style="color:#007aff;">Google Colab</a> · <a href="https://github.com/unslothai/unsloth" target="_blank" style="color:#007aff;">Unsloth</a> · <a href="https://huggingface.co" target="_blank" style="color:#007aff;">HuggingFace</a> · <a href="https://www.kaggle.com/code" target="_blank" style="color:#007aff;">Kaggle</a></div>')
855
 
856
+ # ── TAB 3: Knowledge Base ────────────────────────────────────────────
857
  with gr.Tab("📚 Knowledge Base"):
858
+ gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">База знаний — контекст для генерации текстов</h3>')
859
  with gr.Tabs():
860
  with gr.Tab("➕ Добавить запись"):
861
  with gr.Row():
 
864
  kb_tags = gr.Textbox(label="Теги (через запятую)", placeholder="стиль, технический")
865
  kb_text = gr.Textbox(label="Текст / Контекст", lines=8)
866
  kb_files = gr.File(
867
+ label="Загрузить DOCX/TXT",
868
  file_count="multiple",
869
  file_types=[".docx", ".txt"],
870
  )
 
875
 
876
  with gr.Tab("🔍 Просмотр и поиск"):
877
  with gr.Row():
878
+ kb_list_view = gr.CheckboxGroup(label="Записи (выберите для поиска)", choices=get_kb_choices())
879
+ refresh_btn = gr.Button("🔄 Обновить список", variant="secondary")
880
  with gr.Row():
881
+ search_query = gr.Textbox(label="Поиск", placeholder="ключевое слово...")
882
+ search_btn = gr.Button("Найти", variant="primary")
883
  search_results = gr.Markdown()
884
  with gr.Row():
885
  view_entry = gr.Dropdown(label="Просмотреть запись", choices=get_kb_choices())
886
+ view_btn = gr.Button("👁 Показать", variant="secondary")
887
  del_btn = gr.Button("🗑️ Удалить", variant="stop")
888
  kb_del_status = gr.Textbox(label="Статус", lines=2)
889
  with gr.Row():
 
891
  entry_tags = gr.Textbox(label="Теги", interactive=False)
892
  entry_created = gr.Textbox(label="Создано", interactive=False)
893
 
894
+ with gr.Tab("✍️ Генерация с KB + AI"):
895
+ gr.HTML('<div style="font-size:13px;color:#6e6e73;margin-bottom:12px;">Выберите записи из базы знаний, задайте задание — AI напишет текст в нужном стиле</div>')
896
  gen_entries = gr.CheckboxGroup(label="Выберите записи из KB", choices=get_kb_choices())
897
+ refresh_gen_btn = gr.Button("🔄 Обновить", variant="secondary")
898
  writing_task = gr.Textbox(
899
  label="Задание для генерации",
900
  placeholder="Напиши статью про... / Создай пост о...",
901
  lines=3,
902
  )
903
+ with gr.Row():
904
+ compose_btn = gr.Button("📝 Сформировать промпт", variant="secondary")
905
+ kb_ai_generate_btn = gr.Button("🤖 Сгенерировать через AI", variant="primary")
906
  composed_prompt = gr.Textbox(
907
+ label="Готовый промпт",
908
+ lines=8,
909
+ show_copy_button=True,
910
+ )
911
+ kb_ai_result = gr.Textbox(
912
+ label="Результат AI",
913
  lines=12,
914
  show_copy_button=True,
915
  )
916
 
917
+ # Wire KB events
918
  kb_add_btn.click(
919
  add_kb_entry,
920
  inputs=[kb_name, kb_text, kb_tags, kb_files],
921
  outputs=[kb_status, kb_list_add],
922
  )
 
923
  refresh_btn.click(refresh_kb, outputs=kb_list_view)
924
  search_btn.click(search_kb, inputs=[search_query, kb_list_view], outputs=search_results)
925
  view_btn.click(get_kb_entry_content, inputs=[view_entry], outputs=[entry_text, entry_tags, entry_created])
926
  del_btn.click(delete_kb_entry, inputs=[view_entry], outputs=[kb_del_status, kb_list_view])
927
  refresh_gen_btn.click(refresh_kb, outputs=gen_entries)
928
  compose_btn.click(compose_context_for_writing, inputs=[gen_entries, writing_task], outputs=composed_prompt)
929
+ kb_ai_generate_btn.click(
930
+ ai_generate_with_kb,
931
+ inputs=[gen_entries, writing_task, ai_provider, ai_model, ai_api_key, ai_ollama_host],
932
+ outputs=[kb_ai_result],
933
+ )
934
 
935
+ gr.HTML("""
936
+ <div style="text-align:center;padding:16px 0 4px;font-size:12px;color:#8e8e93;">
937
+ ✍️ <b>Writing Style AI</b> — Dataset Builder · LoRA Fine-tuning · Knowledge Base · AI Chat
938
+ </div>
939
+ """)
940
 
941
  if __name__ == "__main__":
942
  demo.launch()