dataset / app.py
coingimp's picture
Update app.py
c7cc5da verified
import gradio as gr
import json
import os
import re
import csv
import requests
from datetime import datetime
from pathlib import Path
try:
from docx import Document
DOCX_AVAILABLE = True
except ImportError:
DOCX_AVAILABLE = False
DATASET_DIR = "datasets"
KB_DIR = "knowledge_base"
os.makedirs(DATASET_DIR, exist_ok=True)
os.makedirs(KB_DIR, exist_ok=True)
# ─── AI Providers ───────────────────────────────────────────────────────────
def call_openai(prompt, model="gpt-3.5-turbo", api_key=""):
if not api_key:
return "⚠️ Введите API ключ OpenAI"
try:
from openai import OpenAI
client = OpenAI(api_key=api_key)
resp = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
max_tokens=2000,
)
return resp.choices[0].message.content
except Exception as e:
return f"❌ OpenAI error: {e}"
def call_hf_inference(prompt, model="mistralai/Mistral-7B-Instruct-v0.3", api_key=""):
url = f"https://api-inference.huggingface.co/models/{model}"
headers = {}
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
payload = {"inputs": prompt, "parameters": {"max_new_tokens": 800, "temperature": 0.7}}
try:
resp = requests.post(url, headers=headers, json=payload, timeout=60)
data = resp.json()
if isinstance(data, list) and data:
return data[0].get("generated_text", str(data))
if isinstance(data, dict):
if "error" in data:
return f"⚠️ HF: {data['error']}"
return data.get("generated_text", str(data))
return str(data)
except Exception as e:
return f"❌ HF error: {e}"
def call_ollama(prompt, model="llama3.2", host="http://localhost:11434"):
try:
resp = requests.post(
f"{host}/api/generate",
json={"model": model, "prompt": prompt, "stream": False},
timeout=120,
)
return resp.json().get("response", "No response")
except Exception as e:
return f"❌ Ollama error: {e}"
def call_groq(prompt, model="llama-3.1-8b-instant", api_key=""):
if not api_key:
return "⚠️ Введите API ключ Groq"
try:
from openai import OpenAI
client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1")
resp = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
max_tokens=2000,
)
return resp.choices[0].message.content
except Exception as e:
return f"❌ Groq error: {e}"
def call_ai(prompt, provider, model, api_key, ollama_host):
if provider == "🤗 HuggingFace (бесплатно)":
return call_hf_inference(prompt, model, api_key)
elif provider == "⚡ Groq (бесплатно)":
return call_groq(prompt, model, api_key)
elif provider == "🏠 Ollama (локально)":
return call_ollama(prompt, model, ollama_host)
elif provider == "🔵 OpenAI":
return call_openai(prompt, model, api_key)
return "⚠️ Выберите провайдера"
# ─── Dataset functions ────────────────────────────────────────────────────────
def extract_text_from_docx(file_path):
if not DOCX_AVAILABLE:
return ""
doc = Document(file_path)
paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
return "\n\n".join(paragraphs)
def split_into_chunks(text, chunk_size=512, overlap=64):
words = text.split()
chunks = []
i = 0
while i < len(words):
chunk = " ".join(words[i: i + chunk_size])
chunks.append(chunk)
i += chunk_size - overlap
return chunks
def text_to_qa_pairs(text, source_name=""):
paragraphs = [p.strip() for p in text.split("\n\n") if len(p.strip()) > 60]
pairs = []
for i, para in enumerate(paragraphs):
pairs.append({
"instruction": "Продолжи текст в стиле автора.",
"input": para[:200] if len(para) > 200 else "",
"output": para,
"source": source_name,
})
if i > 0:
pairs.append({
"instruction": "Напиши текст на тему: " + para[:80],
"input": "",
"output": para,
"source": source_name,
})
return pairs
dataset_store = []
def process_files(files, chunk_size, overlap, format_choice, add_qa):
global dataset_store
if not files:
return "⚠️ Файлы не выбраны.", gr.update(interactive=False), ""
new_records = []
log_lines = []
for file_obj in files:
path = file_obj
name = os.path.basename(path)
ext = Path(path).suffix.lower()
if ext == ".docx":
if not DOCX_AVAILABLE:
log_lines.append(f"❌ {name}: python-docx не установлен")
continue
text = extract_text_from_docx(path)
elif ext == ".txt":
with open(path, encoding="utf-8", errors="ignore") as f:
text = f.read()
else:
log_lines.append(f"⏭ {name}: неподдерживаемый формат")
continue
if not text.strip():
log_lines.append(f"⚠️ {name}: пустой файл")
continue
chunks = split_into_chunks(text, int(chunk_size), int(overlap))
for chunk in chunks:
new_records.append({
"instruction": "Напиши в стиле автора.",
"input": "",
"output": chunk,
"source": name,
})
if add_qa:
qa_pairs = text_to_qa_pairs(text, name)
new_records.extend(qa_pairs)
log_lines.append(f"✅ {name}: {len(chunks)} чанков")
dataset_store.extend(new_records)
preview = "\n".join(
f"[{i+1}] {r['output'][:120]}..." for i, r in enumerate(dataset_store[:5])
)
return (
"\n".join(log_lines) + f"\n\n📦 Всего записей: {len(dataset_store)}",
gr.update(interactive=True),
preview,
)
def save_dataset(format_choice, dataset_name):
global dataset_store
if not dataset_store:
return "⚠️ Буфер пустой.", None
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
safe_name = re.sub(r"[^a-zA-Z0-9_\-]", "_", dataset_name or "dataset")
fname_base = f"{safe_name}_{ts}"
if format_choice == "JSONL":
out_path = os.path.join(DATASET_DIR, fname_base + ".jsonl")
with open(out_path, "w", encoding="utf-8") as f:
for rec in dataset_store:
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
elif format_choice == "JSON":
out_path = os.path.join(DATASET_DIR, fname_base + ".json")
with open(out_path, "w", encoding="utf-8") as f:
json.dump(dataset_store, f, ensure_ascii=False, indent=2)
elif format_choice == "CSV":
out_path = os.path.join(DATASET_DIR, fname_base + ".csv")
with open(out_path, "w", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=["instruction", "input", "output", "source"])
writer.writeheader()
writer.writerows(dataset_store)
else:
out_path = os.path.join(DATASET_DIR, fname_base + ".jsonl")
with open(out_path, "w", encoding="utf-8") as f:
for rec in dataset_store:
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
count = len(dataset_store)
dataset_store = []
return f"✅ Сохранено: {out_path} ({count} записей)", out_path
def list_datasets():
files = list(Path(DATASET_DIR).glob("*.*"))
if not files:
return "📭 Нет датасетов."
lines = [f"• {f.name} ({f.stat().st_size // 1024} KB)" for f in sorted(files)]
return "\n".join(lines)
def clear_buffer():
global dataset_store
dataset_store = []
return "🗑️ Буфер очищен."
COLAB_TEMPLATE = """## 🚀 Fine-tuning (LoRA) — Бесплатно через Google Colab
### 1. Установка
```python
!pip install -q unsloth transformers peft datasets trl accelerate bitsandbytes
```
### 2. Загрузка модели
```python
from unsloth import FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
model_name="MODEL_PLACEHOLDER",
max_seq_length=2048,
load_in_4bit=True,
)
model = FastLanguageModel.get_peft_model(
model, r=16,
target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"],
lora_alpha=16, lora_dropout=0, bias="none",
use_gradient_checkpointing="unsloth",
)
```
### 3. Загрузка датасета
```python
from datasets import load_dataset
dataset = load_dataset("json", data_files="DATASET_PLACEHOLDER", split="train")
def format_prompt(ex):
return {"text": f"### Instruction:\n{ex['instruction']}\n\n### Input:\n{ex['input']}\n\n### Response:\n{ex['output']}"}
dataset = dataset.map(format_prompt)
```
### 4. Обучение
```python
from trl import SFTTrainer
from transformers import TrainingArguments
import torch
trainer = SFTTrainer(
model=model, tokenizer=tokenizer,
train_dataset=dataset, dataset_text_field="text",
max_seq_length=2048,
args=TrainingArguments(
per_device_train_batch_size=2, gradient_accumulation_steps=4,
num_train_epochs=3, learning_rate=2e-4,
fp16=not torch.cuda.is_bf16_supported(), bf16=torch.cuda.is_bf16_supported(),
output_dir="outputs", optim="adamw_8bit",
),
)
trainer.train()
```
### 5. Сохранение
```python
model.save_pretrained_merged("my_style_model", tokenizer, save_method="merged_16bit")
```
---
**Бесплатные GPU:** Google Colab T4 | Kaggle (30ч/нед) | HuggingFace Spaces"""
def get_colab_guide(model_choice, dataset_path):
model_map = {
"Mistral 7B (рекомендуется)": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
"Llama 3.1 8B": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
"Gemma 2 9B": "unsloth/gemma-2-9b-it-bnb-4bit",
"Phi-3 Mini (быстрее)": "unsloth/Phi-3-mini-4k-instruct",
}
guide = COLAB_TEMPLATE.replace(
"MODEL_PLACEHOLDER",
model_map.get(model_choice, "unsloth/mistral-7b-instruct-v0.3-bnb-4bit")
)
guide = guide.replace("DATASET_PLACEHOLDER", dataset_path or "your_dataset.jsonl")
return guide
# ─── Knowledge Base ──────────────────────────────────────────────────────────
kb_store = {}
KB_FILE = os.path.join(KB_DIR, "knowledge_base.json")
def load_kb():
global kb_store
if os.path.exists(KB_FILE):
with open(KB_FILE, encoding="utf-8") as f:
kb_store = json.load(f)
def save_kb_to_disk():
with open(KB_FILE, "w", encoding="utf-8") as f:
json.dump(kb_store, f, ensure_ascii=False, indent=2)
def get_kb_choices():
return list(kb_store.keys())
def add_kb_entry(name, text, tags_raw, files):
if not name.strip():
return "⚠️ Введите название.", gr.update(choices=get_kb_choices())
tags = [t.strip() for t in tags_raw.split(",") if t.strip()]
content = text.strip()
if files:
for fpath in files:
ext = Path(fpath).suffix.lower()
if ext == ".docx" and DOCX_AVAILABLE:
content += "\n\n" + extract_text_from_docx(fpath)
elif ext == ".txt":
with open(fpath, encoding="utf-8", errors="ignore") as f:
content += "\n\n" + f.read()
kb_store[name.strip()] = {
"text": content,
"tags": tags,
"created": datetime.now().isoformat(),
}
save_kb_to_disk()
choices = get_kb_choices()
return f"✅ '{name}' добавлена в базу.", gr.update(choices=choices)
def refresh_kb():
load_kb()
return gr.update(choices=get_kb_choices())
def get_kb_entry_content(name):
if not name:
return "", "", ""
load_kb()
entry = kb_store.get(name, {})
if not entry:
return "", "", ""
return entry.get("text", ""), ", ".join(entry.get("tags", [])), entry.get("created", "")
def delete_kb_entry(name):
if not name:
return "⚠️ Выберите запись.", gr.update(choices=get_kb_choices())
load_kb()
if name in kb_store:
del kb_store[name]
save_kb_to_disk()
return f"🗑️ '{name}' удалена.", gr.update(choices=get_kb_choices())
return "⚠️ Не найдено.", gr.update(choices=get_kb_choices())
def search_kb(query, selected_entries):
load_kb()
results = []
search_in = selected_entries if selected_entries else list(kb_store.keys())
q = query.lower()
for name in search_in:
entry = kb_store.get(name, {})
text = entry.get("text", "")
tags = " ".join(entry.get("tags", []))
if q in text.lower() or q in name.lower() or q in tags.lower():
snippet = text[:300].replace("\n", " ")
results.append(f"**{name}** [{', '.join(entry.get('tags', []))}]\n{snippet}...")
return "\n\n---\n\n".join(results) if results else "🔍 Ничего не найдено."
def compose_context_for_writing(selected_entries, writing_task):
load_kb()
context_parts = []
for name in (selected_entries or []):
entry = kb_store.get(name, {})
if entry:
context_parts.append(f"=== {name} ===\n{entry['text'][:800]}")
context = "\n\n".join(context_parts)
if not context:
return "⚠️ Выберите записи из Knowledge Base."
return f"""Ты пишешь текст в точном стиле автора, используя следующие знания:
{context}
Задание: {writing_task}
[Начало текста в стиле автора]:"""
load_kb()
# ─── macOS CSS ─────────────────────────────────────────────────────────────
macos_css = """
/* === macOS System Font === */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
* { font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', 'SF Pro Text', 'Inter', 'Helvetica Neue', Arial, sans-serif !important; }
/* === Base === */
body, .gradio-container {
background: #f2f2f7 !important;
color: #1c1c1e !important;
}
footer { display: none !important; }
/* === Window chrome === */
.gradio-container > .main {
background: #f2f2f7 !important;
}
/* === Panels === */
.panel, .block, .form {
background: rgba(255,255,255,0.85) !important;
border: 1px solid rgba(0,0,0,0.08) !important;
border-radius: 12px !important;
box-shadow: 0 1px 3px rgba(0,0,0,0.08), 0 4px 16px rgba(0,0,0,0.04) !important;
backdrop-filter: blur(20px) !important;
}
/* === Tab bar === */
.tabs > .tab-nav {
background: rgba(255,255,255,0.7) !important;
border-radius: 10px !important;
padding: 4px !important;
border: 1px solid rgba(0,0,0,0.08) !important;
backdrop-filter: blur(20px) !important;
}
.tabs > .tab-nav button {
border-radius: 7px !important;
font-size: 13px !important;
font-weight: 500 !important;
color: #8e8e93 !important;
padding: 6px 14px !important;
transition: all 0.15s ease !important;
background: transparent !important;
border: none !important;
}
.tabs > .tab-nav button.selected {
background: #ffffff !important;
color: #1c1c1e !important;
font-weight: 600 !important;
box-shadow: 0 1px 4px rgba(0,0,0,0.15) !important;
}
/* === Buttons === */
button.primary, .btn-primary, button[variant="primary"] {
background: #007aff !important;
color: #ffffff !important;
border: none !important;
border-radius: 8px !important;
font-weight: 600 !important;
font-size: 13px !important;
padding: 8px 18px !important;
transition: all 0.15s ease !important;
box-shadow: 0 1px 3px rgba(0,122,255,0.3) !important;
}
button.primary:hover, button[variant="primary"]:hover {
background: #0071eb !important;
transform: translateY(-0.5px) !important;
box-shadow: 0 2px 8px rgba(0,122,255,0.4) !important;
}
button.stop, button[variant="stop"] {
background: #ff3b30 !important;
color: #fff !important;
border: none !important;
border-radius: 8px !important;
font-weight: 600 !important;
font-size: 13px !important;
}
button.secondary, button[variant="secondary"] {
background: rgba(0,0,0,0.05) !important;
color: #1c1c1e !important;
border: 1px solid rgba(0,0,0,0.12) !important;
border-radius: 8px !important;
font-weight: 500 !important;
font-size: 13px !important;
}
/* === Inputs === */
input[type="text"], textarea, .input-text, select {
background: rgba(255,255,255,0.9) !important;
border: 1px solid rgba(0,0,0,0.15) !important;
border-radius: 8px !important;
color: #1c1c1e !important;
font-size: 13px !important;
padding: 8px 10px !important;
transition: border 0.15s ease, box-shadow 0.15s ease !important;
}
input[type="text"]:focus, textarea:focus {
border-color: #007aff !important;
box-shadow: 0 0 0 3px rgba(0,122,255,0.15) !important;
outline: none !important;
}
/* === Labels === */
label, .label-wrap span, .block > label > span {
font-size: 12px !important;
font-weight: 600 !important;
color: #6e6e73 !important;
letter-spacing: 0.3px !important;
text-transform: uppercase !important;
}
/* === Headings === */
h1 { font-size: 28px !important; font-weight: 700 !important; letter-spacing: -0.5px !important; color: #1c1c1e !important; }
h2 { font-size: 20px !important; font-weight: 600 !important; color: #1c1c1e !important; }
h3 { font-size: 15px !important; font-weight: 600 !important; color: #1c1c1e !important; }
/* === Sliders === */
input[type="range"] {
accent-color: #007aff !important;
}
/* === Checkboxes === */
input[type="checkbox"] {
accent-color: #34c759 !important;
width: 16px !important;
height: 16px !important;
}
/* === Radio === */
input[type="radio"] {
accent-color: #007aff !important;
}
/* === File upload === */
.upload-container, [data-testid="file-upload"] {
background: rgba(0,122,255,0.04) !important;
border: 1.5px dashed rgba(0,122,255,0.3) !important;
border-radius: 12px !important;
transition: all 0.2s ease !important;
}
.upload-container:hover {
background: rgba(0,122,255,0.08) !important;
border-color: #007aff !important;
}
/* === Sidebar / columns === */
.gap {
gap: 12px !important;
}
/* === macOS window title bar decoration === */
.app-header {
display: flex;
align-items: center;
gap: 8px;
padding: 0 0 16px 4px;
}
.traffic-lights {
display: flex;
gap: 6px;
align-items: center;
}
.tl { width:12px; height:12px; border-radius:50%; display:inline-block; }
.tl-red { background:#ff5f57; border:1px solid #e0443e; }
.tl-yellow { background:#febc2e; border:1px solid #d4a000; }
.tl-green { background:#28c840; border:1px solid #1aab29; }
/* === Status boxes === */
.textbox textarea {
font-family: 'SF Mono', 'Menlo', 'Monaco', monospace !important;
font-size: 12px !important;
line-height: 1.5 !important;
}
/* === Markdown output === */
.prose, .markdown-text {
font-size: 14px !important;
line-height: 1.6 !important;
color: #1c1c1e !important;
}
/* === AI Chat bubbles === */
.message-wrap .message {
border-radius: 16px !important;
font-size: 14px !important;
}
/* === Scrollbar === */
::-webkit-scrollbar { width: 6px; height: 6px; }
::-webkit-scrollbar-track { background: transparent; }
::-webkit-scrollbar-thumb { background: rgba(0,0,0,0.2); border-radius: 3px; }
::-webkit-scrollbar-thumb:hover { background: rgba(0,0,0,0.35); }
/* === AI Chat area === */
.chatbot { border-radius: 12px !important; }
/* === Provider selector === */
.provider-pill {
display: inline-flex;
align-items: center;
padding: 4px 10px;
border-radius: 20px;
font-size: 12px;
font-weight: 600;
margin: 2px;
}
"""
# ─── Gradio UI ───────────────────────────────────────────────────────────────
PROVIDER_MODELS = {
"🤗 HuggingFace (бесплатно)": [
"mistralai/Mistral-7B-Instruct-v0.3",
"meta-llama/Meta-Llama-3-8B-Instruct",
"HuggingFaceH4/zephyr-7b-beta",
"google/gemma-7b-it",
"tiiuae/falcon-7b-instruct",
],
"⚡ Groq (бесплатно)": [
"llama-3.1-8b-instant",
"llama-3.3-70b-versatile",
"mixtral-8x7b-32768",
"gemma2-9b-it",
],
"🏠 Ollama (локально)": [
"llama3.2",
"llama3.1",
"mistral",
"phi3",
"gemma2",
"qwen2.5",
],
"🔵 OpenAI": [
"gpt-4o-mini",
"gpt-4o",
"gpt-3.5-turbo",
],
}
def update_models(provider):
models = PROVIDER_MODELS.get(provider, [])
return gr.update(choices=models, value=models[0] if models else "")
# AI chat history
chat_history = []
def ai_chat(message, history, provider, model, api_key, ollama_host, system_prompt):
if not message.strip():
return history, ""
full_prompt = message
if system_prompt.strip():
full_prompt = f"{system_prompt}\n\nПользователь: {message}"
# Add context from history
if history:
ctx = "\n".join([f"User: {h[0]}\nAssistant: {h[1]}" for h in history[-3:]])
full_prompt = ctx + "\n\nUser: " + message
if system_prompt.strip():
full_prompt = system_prompt + "\n\n" + full_prompt
response = call_ai(full_prompt, provider, model, api_key, ollama_host)
history = history + [[message, response]]
return history, ""
def ai_generate_text(prompt, provider, model, api_key, ollama_host):
if not prompt.strip():
return "⚠️ Введите запрос"
return call_ai(prompt, provider, model, api_key, ollama_host)
def ai_generate_with_kb(selected_entries, writing_task, provider, model, api_key, ollama_host):
prompt = compose_context_for_writing(selected_entries, writing_task)
if prompt.startswith("⚠️"):
return prompt
return call_ai(prompt, provider, model, api_key, ollama_host)
def clear_chat():
return [], ""
with gr.Blocks(title="Writing Style AI", css=macos_css, theme=gr.themes.Default(
font=gr.themes.GoogleFont("Inter"),
primary_hue="blue",
secondary_hue="gray",
neutral_hue="gray",
)) as demo:
# macOS window title
gr.HTML("""
<div class="app-header">
<div class="traffic-lights">
<span class="tl tl-red"></span>
<span class="tl tl-yellow"></span>
<span class="tl tl-green"></span>
</div>
<div style="display:flex;align-items:center;gap:10px;margin-left:12px;">
<span style="font-size:22px;">✍️</span>
<div>
<div style="font-size:17px;font-weight:700;color:#1c1c1e;letter-spacing:-0.3px;">Writing Style AI</div>
<div style="font-size:11px;color:#8e8e93;font-weight:500;">Dataset · Fine-tune · Knowledge Base · AI Chat</div>
</div>
</div>
</div>
""")
with gr.Tabs():
# ── TAB 0: AI Assistant ──────────────────────────────────────────────
with gr.Tab("🤖 AI Ассистент"):
# Provider config row
with gr.Row():
with gr.Column(scale=1):
gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin-bottom:6px;">ПРОВАЙДЕР</div>')
ai_provider = gr.Radio(
choices=list(PROVIDER_MODELS.keys()),
value="🤗 HuggingFace (бесплатно)",
label="",
elem_id="provider_radio",
)
with gr.Column(scale=1):
ai_model = gr.Dropdown(
choices=PROVIDER_MODELS["🤗 HuggingFace (бесплатно)"],
value="mistralai/Mistral-7B-Instruct-v0.3",
label="Модель",
)
ai_api_key = gr.Textbox(
label="API ключ",
placeholder="sk-... или hf_... (необязательно для HF)",
type="password",
)
ai_ollama_host = gr.Textbox(
label="Ollama Host",
value="http://localhost:11434",
visible=False,
)
# Chat area
with gr.Row():
with gr.Column(scale=2):
ai_system_prompt = gr.Textbox(
label="System Prompt (необязательно)",
placeholder="Ты — профессиональный писатель в стиле Хемингуэя...",
lines=2,
)
ai_chatbot = gr.Chatbot(
label="",
height=420,
show_copy_button=True,
avatar_images=["🧑", "🤖"],
elem_id="macos_chat",
)
with gr.Row():
ai_input = gr.Textbox(
label="",
placeholder="Напишите сообщение... (Enter для отправки)",
lines=2,
scale=4,
)
with gr.Column(scale=1, min_width=100):
ai_send_btn = gr.Button("Отправить ↑", variant="primary")
ai_clear_btn = gr.Button("Очистить", variant="secondary")
with gr.Column(scale=1):
gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin-bottom:8px;">БЫСТРЫЕ ЗАПРОСЫ</div>')
gr.HTML("""
<div style="display:flex;flex-direction:column;gap:6px;">
<div style="background:rgba(0,122,255,0.08);border:1px solid rgba(0,122,255,0.2);border-radius:8px;padding:10px;font-size:13px;cursor:pointer;" onclick="document.querySelector('#macos_chat input, textarea').value='Напиши статью про ИИ в стиле научпоп'">📄 Статья в стиле научпоп</div>
<div style="background:rgba(52,199,89,0.08);border:1px solid rgba(52,199,89,0.2);border-radius:8px;padding:10px;font-size:13px;" >💡 Генерация идей</div>
<div style="background:rgba(255,149,0,0.08);border:1px solid rgba(255,149,0,0.2);border-radius:8px;padding:10px;font-size:13px;" >✏️ Редактирование текста</div>
<div style="background:rgba(175,82,222,0.08);border:1px solid rgba(175,82,222,0.2);border-radius:8px;padding:10px;font-size:13px;" >🔄 Перевод и локализация</div>
</div>
""")
gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin:16px 0 8px;">ОДИНОЧНЫЙ ЗАПРОС</div>')
single_prompt = gr.Textbox(label="Промпт", lines=4, placeholder="Введите промпт...")
single_generate_btn = gr.Button("⚡ Генерировать", variant="primary")
single_output = gr.Textbox(label="Результат", lines=8, show_copy_button=True)
# Wire AI tab events
ai_provider.change(update_models, inputs=[ai_provider], outputs=[ai_model])
ai_provider.change(
lambda p: gr.update(visible=p == "🏠 Ollama (локально)"),
inputs=[ai_provider], outputs=[ai_ollama_host]
)
ai_send_btn.click(
ai_chat,
inputs=[ai_input, ai_chatbot, ai_provider, ai_model, ai_api_key, ai_ollama_host, ai_system_prompt],
outputs=[ai_chatbot, ai_input],
)
ai_input.submit(
ai_chat,
inputs=[ai_input, ai_chatbot, ai_provider, ai_model, ai_api_key, ai_ollama_host, ai_system_prompt],
outputs=[ai_chatbot, ai_input],
)
ai_clear_btn.click(clear_chat, outputs=[ai_chatbot, ai_input])
single_generate_btn.click(
ai_generate_text,
inputs=[single_prompt, ai_provider, ai_model, ai_api_key, ai_ollama_host],
outputs=[single_output],
)
# ── TAB 1: Dataset Builder ───────────────────────────────────────────
with gr.Tab("📦 Dataset Builder"):
gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">Загрузите ваши тексты (DOCX / TXT) для создания датасета</h3>')
with gr.Row():
with gr.Column(scale=2):
file_input = gr.File(
label="DOCX или TXT файлы",
file_count="multiple",
file_types=[".docx", ".txt"],
)
with gr.Row():
chunk_size = gr.Slider(128, 1024, value=512, step=64, label="Размер чанка (слов)")
overlap = gr.Slider(0, 128, value=64, step=16, label="Перекрытие (слов)")
add_qa = gr.Checkbox(value=True, label="Генерировать QA-пары")
format_choice = gr.Radio(["JSONL", "JSON", "CSV"], value="JSONL", label="Формат")
dataset_name = gr.Textbox(label="Название датасета", value="my_dataset")
with gr.Row():
process_btn = gr.Button("⚙️ Обработать файлы", variant="primary")
clear_btn = gr.Button("🗑️ Очистить буфер", variant="secondary")
save_btn = gr.Button("💾 Сохранить датасет", variant="primary", interactive=False)
with gr.Column(scale=1):
status_box = gr.Textbox(label="Статус", lines=8)
preview_box = gr.Textbox(label="Превью записей", lines=8)
file_output = gr.File(label="Скачать датасет")
datasets_list = gr.Textbox(label="Сохранённые датасеты", lines=4, value=list_datasets)
process_btn.click(
process_files,
inputs=[file_input, chunk_size, overlap, format_choice, add_qa],
outputs=[status_box, save_btn, preview_box],
)
save_btn.click(
save_dataset,
inputs=[format_choice, dataset_name],
outputs=[status_box, file_output],
).then(lambda: list_datasets(), outputs=datasets_list)
clear_btn.click(clear_buffer, outputs=status_box)
# ── TAB 2: Fine-tuning Guide ─────────────────────────────────────────
with gr.Tab("🧠 Fine-tuning (LoRA)"):
gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">Бесплатное обучение LoRA через Google Colab</h3>')
with gr.Row():
with gr.Column():
model_choice = gr.Radio(
["Mistral 7B (рекомендуется)", "Llama 3.1 8B", "Gemma 2 9B", "Phi-3 Mini (быстрее)"],
value="Mistral 7B (рекомендуется)",
label="Базовая модель",
)
dataset_path_input = gr.Textbox(
label="Путь к датасету",
placeholder="datasets/my_dataset.jsonl",
)
guide_btn = gr.Button("📋 Получить инструкции", variant="primary")
with gr.Column(scale=2):
guide_output = gr.Markdown()
guide_btn.click(get_colab_guide, inputs=[model_choice, dataset_path_input], outputs=guide_output)
gr.HTML('<div style="margin-top:12px;font-size:13px;color:#6e6e73;"><b>Ресурсы:</b> <a href="https://colab.research.google.com" target="_blank" style="color:#007aff;">Google Colab</a> · <a href="https://github.com/unslothai/unsloth" target="_blank" style="color:#007aff;">Unsloth</a> · <a href="https://huggingface.co" target="_blank" style="color:#007aff;">HuggingFace</a> · <a href="https://www.kaggle.com/code" target="_blank" style="color:#007aff;">Kaggle</a></div>')
# ── TAB 3: Knowledge Base ────────────────────────────────────────────
with gr.Tab("📚 Knowledge Base"):
gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">База знаний — контекст для генерации текстов</h3>')
with gr.Tabs():
with gr.Tab("➕ Добавить запись"):
with gr.Row():
with gr.Column():
kb_name = gr.Textbox(label="Название записи", placeholder="Мой стиль / Тема...")
kb_tags = gr.Textbox(label="Теги (через запятую)", placeholder="стиль, технический")
kb_text = gr.Textbox(label="Текст / Контекст", lines=8)
kb_files = gr.File(
label="Загрузить DOCX/TXT",
file_count="multiple",
file_types=[".docx", ".txt"],
)
kb_add_btn = gr.Button("✅ Добавить в базу", variant="primary")
with gr.Column():
kb_status = gr.Textbox(label="Статус", lines=3)
kb_list_add = gr.CheckboxGroup(label="Записи в базе", choices=get_kb_choices())
with gr.Tab("🔍 Просмотр и поиск"):
with gr.Row():
kb_list_view = gr.CheckboxGroup(label="Записи (выберите для поиска)", choices=get_kb_choices())
refresh_btn = gr.Button("🔄 Обновить список", variant="secondary")
with gr.Row():
search_query = gr.Textbox(label="Поиск", placeholder="ключевое слово...")
search_btn = gr.Button("Найти", variant="primary")
search_results = gr.Markdown()
with gr.Row():
view_entry = gr.Dropdown(label="Просмотреть запись", choices=get_kb_choices())
view_btn = gr.Button("👁 Показать", variant="secondary")
del_btn = gr.Button("🗑️ Удалить", variant="stop")
kb_del_status = gr.Textbox(label="Статус", lines=2)
with gr.Row():
entry_text = gr.Textbox(label="Текст", lines=6, interactive=False)
entry_tags = gr.Textbox(label="Теги", interactive=False)
entry_created = gr.Textbox(label="Создано", interactive=False)
with gr.Tab("✍️ Генерация с KB + AI"):
gr.HTML('<div style="font-size:13px;color:#6e6e73;margin-bottom:12px;">Выберите записи из базы знаний, задайте задание — AI напишет текст в нужном стиле</div>')
gen_entries = gr.CheckboxGroup(label="Выберите записи из KB", choices=get_kb_choices())
refresh_gen_btn = gr.Button("🔄 Обновить", variant="secondary")
writing_task = gr.Textbox(
label="Задание для генерации",
placeholder="Напиши статью про... / Создай пост о...",
lines=3,
)
with gr.Row():
compose_btn = gr.Button("📝 Сформировать промпт", variant="secondary")
kb_ai_generate_btn = gr.Button("🤖 Сгенерировать через AI", variant="primary")
composed_prompt = gr.Textbox(
label="Готовый промпт",
lines=8,
show_copy_button=True,
)
kb_ai_result = gr.Textbox(
label="Результат AI",
lines=12,
show_copy_button=True,
)
# Wire KB events
kb_add_btn.click(
add_kb_entry,
inputs=[kb_name, kb_text, kb_tags, kb_files],
outputs=[kb_status, kb_list_add],
)
refresh_btn.click(refresh_kb, outputs=kb_list_view)
search_btn.click(search_kb, inputs=[search_query, kb_list_view], outputs=search_results)
view_btn.click(get_kb_entry_content, inputs=[view_entry], outputs=[entry_text, entry_tags, entry_created])
del_btn.click(delete_kb_entry, inputs=[view_entry], outputs=[kb_del_status, kb_list_view])
refresh_gen_btn.click(refresh_kb, outputs=gen_entries)
compose_btn.click(compose_context_for_writing, inputs=[gen_entries, writing_task], outputs=composed_prompt)
kb_ai_generate_btn.click(
ai_generate_with_kb,
inputs=[gen_entries, writing_task, ai_provider, ai_model, ai_api_key, ai_ollama_host],
outputs=[kb_ai_result],
)
gr.HTML("""
<div style="text-align:center;padding:16px 0 4px;font-size:12px;color:#8e8e93;">
✍️ <b>Writing Style AI</b> — Dataset Builder · LoRA Fine-tuning · Knowledge Base · AI Chat
</div>
""")
if __name__ == "__main__":
demo.launch()