| import gradio as gr |
| import json |
| import os |
| import re |
| import csv |
| import requests |
| from datetime import datetime |
| from pathlib import Path |
|
|
| try: |
| from docx import Document |
| DOCX_AVAILABLE = True |
| except ImportError: |
| DOCX_AVAILABLE = False |
|
|
| DATASET_DIR = "datasets" |
| KB_DIR = "knowledge_base" |
| os.makedirs(DATASET_DIR, exist_ok=True) |
| os.makedirs(KB_DIR, exist_ok=True) |
|
|
| |
| def call_openai(prompt, model="gpt-3.5-turbo", api_key=""): |
| if not api_key: |
| return "⚠️ Введите API ключ OpenAI" |
| try: |
| from openai import OpenAI |
| client = OpenAI(api_key=api_key) |
| resp = client.chat.completions.create( |
| model=model, |
| messages=[{"role": "user", "content": prompt}], |
| max_tokens=2000, |
| ) |
| return resp.choices[0].message.content |
| except Exception as e: |
| return f"❌ OpenAI error: {e}" |
|
|
| def call_hf_inference(prompt, model="mistralai/Mistral-7B-Instruct-v0.3", api_key=""): |
| url = f"https://api-inference.huggingface.co/models/{model}" |
| headers = {} |
| if api_key: |
| headers["Authorization"] = f"Bearer {api_key}" |
| payload = {"inputs": prompt, "parameters": {"max_new_tokens": 800, "temperature": 0.7}} |
| try: |
| resp = requests.post(url, headers=headers, json=payload, timeout=60) |
| data = resp.json() |
| if isinstance(data, list) and data: |
| return data[0].get("generated_text", str(data)) |
| if isinstance(data, dict): |
| if "error" in data: |
| return f"⚠️ HF: {data['error']}" |
| return data.get("generated_text", str(data)) |
| return str(data) |
| except Exception as e: |
| return f"❌ HF error: {e}" |
|
|
| def call_ollama(prompt, model="llama3.2", host="http://localhost:11434"): |
| try: |
| resp = requests.post( |
| f"{host}/api/generate", |
| json={"model": model, "prompt": prompt, "stream": False}, |
| timeout=120, |
| ) |
| return resp.json().get("response", "No response") |
| except Exception as e: |
| return f"❌ Ollama error: {e}" |
|
|
| def call_groq(prompt, model="llama-3.1-8b-instant", api_key=""): |
| if not api_key: |
| return "⚠️ Введите API ключ Groq" |
| try: |
| from openai import OpenAI |
| client = OpenAI(api_key=api_key, base_url="https://api.groq.com/openai/v1") |
| resp = client.chat.completions.create( |
| model=model, |
| messages=[{"role": "user", "content": prompt}], |
| max_tokens=2000, |
| ) |
| return resp.choices[0].message.content |
| except Exception as e: |
| return f"❌ Groq error: {e}" |
|
|
| def call_ai(prompt, provider, model, api_key, ollama_host): |
| if provider == "🤗 HuggingFace (бесплатно)": |
| return call_hf_inference(prompt, model, api_key) |
| elif provider == "⚡ Groq (бесплатно)": |
| return call_groq(prompt, model, api_key) |
| elif provider == "🏠 Ollama (локально)": |
| return call_ollama(prompt, model, ollama_host) |
| elif provider == "🔵 OpenAI": |
| return call_openai(prompt, model, api_key) |
| return "⚠️ Выберите провайдера" |
|
|
|
|
| |
| def extract_text_from_docx(file_path): |
| if not DOCX_AVAILABLE: |
| return "" |
| doc = Document(file_path) |
| paragraphs = [p.text for p in doc.paragraphs if p.text.strip()] |
| return "\n\n".join(paragraphs) |
|
|
| def split_into_chunks(text, chunk_size=512, overlap=64): |
| words = text.split() |
| chunks = [] |
| i = 0 |
| while i < len(words): |
| chunk = " ".join(words[i: i + chunk_size]) |
| chunks.append(chunk) |
| i += chunk_size - overlap |
| return chunks |
|
|
| def text_to_qa_pairs(text, source_name=""): |
| paragraphs = [p.strip() for p in text.split("\n\n") if len(p.strip()) > 60] |
| pairs = [] |
| for i, para in enumerate(paragraphs): |
| pairs.append({ |
| "instruction": "Продолжи текст в стиле автора.", |
| "input": para[:200] if len(para) > 200 else "", |
| "output": para, |
| "source": source_name, |
| }) |
| if i > 0: |
| pairs.append({ |
| "instruction": "Напиши текст на тему: " + para[:80], |
| "input": "", |
| "output": para, |
| "source": source_name, |
| }) |
| return pairs |
|
|
| dataset_store = [] |
|
|
| def process_files(files, chunk_size, overlap, format_choice, add_qa): |
| global dataset_store |
| if not files: |
| return "⚠️ Файлы не выбраны.", gr.update(interactive=False), "" |
| new_records = [] |
| log_lines = [] |
| for file_obj in files: |
| path = file_obj |
| name = os.path.basename(path) |
| ext = Path(path).suffix.lower() |
| if ext == ".docx": |
| if not DOCX_AVAILABLE: |
| log_lines.append(f"❌ {name}: python-docx не установлен") |
| continue |
| text = extract_text_from_docx(path) |
| elif ext == ".txt": |
| with open(path, encoding="utf-8", errors="ignore") as f: |
| text = f.read() |
| else: |
| log_lines.append(f"⏭ {name}: неподдерживаемый формат") |
| continue |
| if not text.strip(): |
| log_lines.append(f"⚠️ {name}: пустой файл") |
| continue |
| chunks = split_into_chunks(text, int(chunk_size), int(overlap)) |
| for chunk in chunks: |
| new_records.append({ |
| "instruction": "Напиши в стиле автора.", |
| "input": "", |
| "output": chunk, |
| "source": name, |
| }) |
| if add_qa: |
| qa_pairs = text_to_qa_pairs(text, name) |
| new_records.extend(qa_pairs) |
| log_lines.append(f"✅ {name}: {len(chunks)} чанков") |
| dataset_store.extend(new_records) |
| preview = "\n".join( |
| f"[{i+1}] {r['output'][:120]}..." for i, r in enumerate(dataset_store[:5]) |
| ) |
| return ( |
| "\n".join(log_lines) + f"\n\n📦 Всего записей: {len(dataset_store)}", |
| gr.update(interactive=True), |
| preview, |
| ) |
|
|
| def save_dataset(format_choice, dataset_name): |
| global dataset_store |
| if not dataset_store: |
| return "⚠️ Буфер пустой.", None |
| ts = datetime.now().strftime("%Y%m%d_%H%M%S") |
| safe_name = re.sub(r"[^a-zA-Z0-9_\-]", "_", dataset_name or "dataset") |
| fname_base = f"{safe_name}_{ts}" |
| if format_choice == "JSONL": |
| out_path = os.path.join(DATASET_DIR, fname_base + ".jsonl") |
| with open(out_path, "w", encoding="utf-8") as f: |
| for rec in dataset_store: |
| f.write(json.dumps(rec, ensure_ascii=False) + "\n") |
| elif format_choice == "JSON": |
| out_path = os.path.join(DATASET_DIR, fname_base + ".json") |
| with open(out_path, "w", encoding="utf-8") as f: |
| json.dump(dataset_store, f, ensure_ascii=False, indent=2) |
| elif format_choice == "CSV": |
| out_path = os.path.join(DATASET_DIR, fname_base + ".csv") |
| with open(out_path, "w", newline="", encoding="utf-8") as f: |
| writer = csv.DictWriter(f, fieldnames=["instruction", "input", "output", "source"]) |
| writer.writeheader() |
| writer.writerows(dataset_store) |
| else: |
| out_path = os.path.join(DATASET_DIR, fname_base + ".jsonl") |
| with open(out_path, "w", encoding="utf-8") as f: |
| for rec in dataset_store: |
| f.write(json.dumps(rec, ensure_ascii=False) + "\n") |
| count = len(dataset_store) |
| dataset_store = [] |
| return f"✅ Сохранено: {out_path} ({count} записей)", out_path |
|
|
| def list_datasets(): |
| files = list(Path(DATASET_DIR).glob("*.*")) |
| if not files: |
| return "📭 Нет датасетов." |
| lines = [f"• {f.name} ({f.stat().st_size // 1024} KB)" for f in sorted(files)] |
| return "\n".join(lines) |
|
|
| def clear_buffer(): |
| global dataset_store |
| dataset_store = [] |
| return "🗑️ Буфер очищен." |
|
|
|
|
| COLAB_TEMPLATE = """## 🚀 Fine-tuning (LoRA) — Бесплатно через Google Colab |
| ### 1. Установка |
| ```python |
| !pip install -q unsloth transformers peft datasets trl accelerate bitsandbytes |
| ``` |
| ### 2. Загрузка модели |
| ```python |
| from unsloth import FastLanguageModel |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name="MODEL_PLACEHOLDER", |
| max_seq_length=2048, |
| load_in_4bit=True, |
| ) |
| model = FastLanguageModel.get_peft_model( |
| model, r=16, |
| target_modules=["q_proj","k_proj","v_proj","o_proj","gate_proj","up_proj","down_proj"], |
| lora_alpha=16, lora_dropout=0, bias="none", |
| use_gradient_checkpointing="unsloth", |
| ) |
| ``` |
| ### 3. Загрузка датасета |
| ```python |
| from datasets import load_dataset |
| dataset = load_dataset("json", data_files="DATASET_PLACEHOLDER", split="train") |
| def format_prompt(ex): |
| return {"text": f"### Instruction:\n{ex['instruction']}\n\n### Input:\n{ex['input']}\n\n### Response:\n{ex['output']}"} |
| dataset = dataset.map(format_prompt) |
| ``` |
| ### 4. Обучение |
| ```python |
| from trl import SFTTrainer |
| from transformers import TrainingArguments |
| import torch |
| trainer = SFTTrainer( |
| model=model, tokenizer=tokenizer, |
| train_dataset=dataset, dataset_text_field="text", |
| max_seq_length=2048, |
| args=TrainingArguments( |
| per_device_train_batch_size=2, gradient_accumulation_steps=4, |
| num_train_epochs=3, learning_rate=2e-4, |
| fp16=not torch.cuda.is_bf16_supported(), bf16=torch.cuda.is_bf16_supported(), |
| output_dir="outputs", optim="adamw_8bit", |
| ), |
| ) |
| trainer.train() |
| ``` |
| ### 5. Сохранение |
| ```python |
| model.save_pretrained_merged("my_style_model", tokenizer, save_method="merged_16bit") |
| ``` |
| --- |
| **Бесплатные GPU:** Google Colab T4 | Kaggle (30ч/нед) | HuggingFace Spaces""" |
|
|
| def get_colab_guide(model_choice, dataset_path): |
| model_map = { |
| "Mistral 7B (рекомендуется)": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", |
| "Llama 3.1 8B": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit", |
| "Gemma 2 9B": "unsloth/gemma-2-9b-it-bnb-4bit", |
| "Phi-3 Mini (быстрее)": "unsloth/Phi-3-mini-4k-instruct", |
| } |
| guide = COLAB_TEMPLATE.replace( |
| "MODEL_PLACEHOLDER", |
| model_map.get(model_choice, "unsloth/mistral-7b-instruct-v0.3-bnb-4bit") |
| ) |
| guide = guide.replace("DATASET_PLACEHOLDER", dataset_path or "your_dataset.jsonl") |
| return guide |
|
|
| |
| kb_store = {} |
| KB_FILE = os.path.join(KB_DIR, "knowledge_base.json") |
|
|
| def load_kb(): |
| global kb_store |
| if os.path.exists(KB_FILE): |
| with open(KB_FILE, encoding="utf-8") as f: |
| kb_store = json.load(f) |
|
|
| def save_kb_to_disk(): |
| with open(KB_FILE, "w", encoding="utf-8") as f: |
| json.dump(kb_store, f, ensure_ascii=False, indent=2) |
|
|
| def get_kb_choices(): |
| return list(kb_store.keys()) |
|
|
| def add_kb_entry(name, text, tags_raw, files): |
| if not name.strip(): |
| return "⚠️ Введите название.", gr.update(choices=get_kb_choices()) |
| tags = [t.strip() for t in tags_raw.split(",") if t.strip()] |
| content = text.strip() |
| if files: |
| for fpath in files: |
| ext = Path(fpath).suffix.lower() |
| if ext == ".docx" and DOCX_AVAILABLE: |
| content += "\n\n" + extract_text_from_docx(fpath) |
| elif ext == ".txt": |
| with open(fpath, encoding="utf-8", errors="ignore") as f: |
| content += "\n\n" + f.read() |
| kb_store[name.strip()] = { |
| "text": content, |
| "tags": tags, |
| "created": datetime.now().isoformat(), |
| } |
| save_kb_to_disk() |
| choices = get_kb_choices() |
| return f"✅ '{name}' добавлена в базу.", gr.update(choices=choices) |
|
|
| def refresh_kb(): |
| load_kb() |
| return gr.update(choices=get_kb_choices()) |
|
|
| def get_kb_entry_content(name): |
| if not name: |
| return "", "", "" |
| load_kb() |
| entry = kb_store.get(name, {}) |
| if not entry: |
| return "", "", "" |
| return entry.get("text", ""), ", ".join(entry.get("tags", [])), entry.get("created", "") |
|
|
| def delete_kb_entry(name): |
| if not name: |
| return "⚠️ Выберите запись.", gr.update(choices=get_kb_choices()) |
| load_kb() |
| if name in kb_store: |
| del kb_store[name] |
| save_kb_to_disk() |
| return f"🗑️ '{name}' удалена.", gr.update(choices=get_kb_choices()) |
| return "⚠️ Не найдено.", gr.update(choices=get_kb_choices()) |
|
|
| def search_kb(query, selected_entries): |
| load_kb() |
| results = [] |
| search_in = selected_entries if selected_entries else list(kb_store.keys()) |
| q = query.lower() |
| for name in search_in: |
| entry = kb_store.get(name, {}) |
| text = entry.get("text", "") |
| tags = " ".join(entry.get("tags", [])) |
| if q in text.lower() or q in name.lower() or q in tags.lower(): |
| snippet = text[:300].replace("\n", " ") |
| results.append(f"**{name}** [{', '.join(entry.get('tags', []))}]\n{snippet}...") |
| return "\n\n---\n\n".join(results) if results else "🔍 Ничего не найдено." |
|
|
| def compose_context_for_writing(selected_entries, writing_task): |
| load_kb() |
| context_parts = [] |
| for name in (selected_entries or []): |
| entry = kb_store.get(name, {}) |
| if entry: |
| context_parts.append(f"=== {name} ===\n{entry['text'][:800]}") |
| context = "\n\n".join(context_parts) |
| if not context: |
| return "⚠️ Выберите записи из Knowledge Base." |
| return f"""Ты пишешь текст в точном стиле автора, используя следующие знания: |
| |
| {context} |
| |
| Задание: {writing_task} |
| |
| [Начало текста в стиле автора]:""" |
|
|
| load_kb() |
|
|
|
|
| |
| macos_css = """ |
| /* === macOS System Font === */ |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); |
| |
| * { font-family: -apple-system, BlinkMacSystemFont, 'SF Pro Display', 'SF Pro Text', 'Inter', 'Helvetica Neue', Arial, sans-serif !important; } |
| |
| /* === Base === */ |
| body, .gradio-container { |
| background: #f2f2f7 !important; |
| color: #1c1c1e !important; |
| } |
| |
| footer { display: none !important; } |
| |
| /* === Window chrome === */ |
| .gradio-container > .main { |
| background: #f2f2f7 !important; |
| } |
| |
| /* === Panels === */ |
| .panel, .block, .form { |
| background: rgba(255,255,255,0.85) !important; |
| border: 1px solid rgba(0,0,0,0.08) !important; |
| border-radius: 12px !important; |
| box-shadow: 0 1px 3px rgba(0,0,0,0.08), 0 4px 16px rgba(0,0,0,0.04) !important; |
| backdrop-filter: blur(20px) !important; |
| } |
| |
| /* === Tab bar === */ |
| .tabs > .tab-nav { |
| background: rgba(255,255,255,0.7) !important; |
| border-radius: 10px !important; |
| padding: 4px !important; |
| border: 1px solid rgba(0,0,0,0.08) !important; |
| backdrop-filter: blur(20px) !important; |
| } |
| |
| .tabs > .tab-nav button { |
| border-radius: 7px !important; |
| font-size: 13px !important; |
| font-weight: 500 !important; |
| color: #8e8e93 !important; |
| padding: 6px 14px !important; |
| transition: all 0.15s ease !important; |
| background: transparent !important; |
| border: none !important; |
| } |
| |
| .tabs > .tab-nav button.selected { |
| background: #ffffff !important; |
| color: #1c1c1e !important; |
| font-weight: 600 !important; |
| box-shadow: 0 1px 4px rgba(0,0,0,0.15) !important; |
| } |
| |
| /* === Buttons === */ |
| button.primary, .btn-primary, button[variant="primary"] { |
| background: #007aff !important; |
| color: #ffffff !important; |
| border: none !important; |
| border-radius: 8px !important; |
| font-weight: 600 !important; |
| font-size: 13px !important; |
| padding: 8px 18px !important; |
| transition: all 0.15s ease !important; |
| box-shadow: 0 1px 3px rgba(0,122,255,0.3) !important; |
| } |
| |
| button.primary:hover, button[variant="primary"]:hover { |
| background: #0071eb !important; |
| transform: translateY(-0.5px) !important; |
| box-shadow: 0 2px 8px rgba(0,122,255,0.4) !important; |
| } |
| |
| button.stop, button[variant="stop"] { |
| background: #ff3b30 !important; |
| color: #fff !important; |
| border: none !important; |
| border-radius: 8px !important; |
| font-weight: 600 !important; |
| font-size: 13px !important; |
| } |
| |
| button.secondary, button[variant="secondary"] { |
| background: rgba(0,0,0,0.05) !important; |
| color: #1c1c1e !important; |
| border: 1px solid rgba(0,0,0,0.12) !important; |
| border-radius: 8px !important; |
| font-weight: 500 !important; |
| font-size: 13px !important; |
| } |
| |
| /* === Inputs === */ |
| input[type="text"], textarea, .input-text, select { |
| background: rgba(255,255,255,0.9) !important; |
| border: 1px solid rgba(0,0,0,0.15) !important; |
| border-radius: 8px !important; |
| color: #1c1c1e !important; |
| font-size: 13px !important; |
| padding: 8px 10px !important; |
| transition: border 0.15s ease, box-shadow 0.15s ease !important; |
| } |
| |
| input[type="text"]:focus, textarea:focus { |
| border-color: #007aff !important; |
| box-shadow: 0 0 0 3px rgba(0,122,255,0.15) !important; |
| outline: none !important; |
| } |
| |
| /* === Labels === */ |
| label, .label-wrap span, .block > label > span { |
| font-size: 12px !important; |
| font-weight: 600 !important; |
| color: #6e6e73 !important; |
| letter-spacing: 0.3px !important; |
| text-transform: uppercase !important; |
| } |
| |
| /* === Headings === */ |
| h1 { font-size: 28px !important; font-weight: 700 !important; letter-spacing: -0.5px !important; color: #1c1c1e !important; } |
| h2 { font-size: 20px !important; font-weight: 600 !important; color: #1c1c1e !important; } |
| h3 { font-size: 15px !important; font-weight: 600 !important; color: #1c1c1e !important; } |
| |
| /* === Sliders === */ |
| input[type="range"] { |
| accent-color: #007aff !important; |
| } |
| |
| /* === Checkboxes === */ |
| input[type="checkbox"] { |
| accent-color: #34c759 !important; |
| width: 16px !important; |
| height: 16px !important; |
| } |
| |
| /* === Radio === */ |
| input[type="radio"] { |
| accent-color: #007aff !important; |
| } |
| |
| /* === File upload === */ |
| .upload-container, [data-testid="file-upload"] { |
| background: rgba(0,122,255,0.04) !important; |
| border: 1.5px dashed rgba(0,122,255,0.3) !important; |
| border-radius: 12px !important; |
| transition: all 0.2s ease !important; |
| } |
| |
| .upload-container:hover { |
| background: rgba(0,122,255,0.08) !important; |
| border-color: #007aff !important; |
| } |
| |
| /* === Sidebar / columns === */ |
| .gap { |
| gap: 12px !important; |
| } |
| |
| /* === macOS window title bar decoration === */ |
| .app-header { |
| display: flex; |
| align-items: center; |
| gap: 8px; |
| padding: 0 0 16px 4px; |
| } |
| |
| .traffic-lights { |
| display: flex; |
| gap: 6px; |
| align-items: center; |
| } |
| |
| .tl { width:12px; height:12px; border-radius:50%; display:inline-block; } |
| .tl-red { background:#ff5f57; border:1px solid #e0443e; } |
| .tl-yellow { background:#febc2e; border:1px solid #d4a000; } |
| .tl-green { background:#28c840; border:1px solid #1aab29; } |
| |
| /* === Status boxes === */ |
| .textbox textarea { |
| font-family: 'SF Mono', 'Menlo', 'Monaco', monospace !important; |
| font-size: 12px !important; |
| line-height: 1.5 !important; |
| } |
| |
| /* === Markdown output === */ |
| .prose, .markdown-text { |
| font-size: 14px !important; |
| line-height: 1.6 !important; |
| color: #1c1c1e !important; |
| } |
| |
| /* === AI Chat bubbles === */ |
| .message-wrap .message { |
| border-radius: 16px !important; |
| font-size: 14px !important; |
| } |
| |
| /* === Scrollbar === */ |
| ::-webkit-scrollbar { width: 6px; height: 6px; } |
| ::-webkit-scrollbar-track { background: transparent; } |
| ::-webkit-scrollbar-thumb { background: rgba(0,0,0,0.2); border-radius: 3px; } |
| ::-webkit-scrollbar-thumb:hover { background: rgba(0,0,0,0.35); } |
| |
| /* === AI Chat area === */ |
| .chatbot { border-radius: 12px !important; } |
| |
| /* === Provider selector === */ |
| .provider-pill { |
| display: inline-flex; |
| align-items: center; |
| padding: 4px 10px; |
| border-radius: 20px; |
| font-size: 12px; |
| font-weight: 600; |
| margin: 2px; |
| } |
| """ |
|
|
|
|
| |
|
|
| PROVIDER_MODELS = { |
| "🤗 HuggingFace (бесплатно)": [ |
| "mistralai/Mistral-7B-Instruct-v0.3", |
| "meta-llama/Meta-Llama-3-8B-Instruct", |
| "HuggingFaceH4/zephyr-7b-beta", |
| "google/gemma-7b-it", |
| "tiiuae/falcon-7b-instruct", |
| ], |
| "⚡ Groq (бесплатно)": [ |
| "llama-3.1-8b-instant", |
| "llama-3.3-70b-versatile", |
| "mixtral-8x7b-32768", |
| "gemma2-9b-it", |
| ], |
| "🏠 Ollama (локально)": [ |
| "llama3.2", |
| "llama3.1", |
| "mistral", |
| "phi3", |
| "gemma2", |
| "qwen2.5", |
| ], |
| "🔵 OpenAI": [ |
| "gpt-4o-mini", |
| "gpt-4o", |
| "gpt-3.5-turbo", |
| ], |
| } |
|
|
| def update_models(provider): |
| models = PROVIDER_MODELS.get(provider, []) |
| return gr.update(choices=models, value=models[0] if models else "") |
|
|
| |
| chat_history = [] |
|
|
| def ai_chat(message, history, provider, model, api_key, ollama_host, system_prompt): |
| if not message.strip(): |
| return history, "" |
| full_prompt = message |
| if system_prompt.strip(): |
| full_prompt = f"{system_prompt}\n\nПользователь: {message}" |
| |
| if history: |
| ctx = "\n".join([f"User: {h[0]}\nAssistant: {h[1]}" for h in history[-3:]]) |
| full_prompt = ctx + "\n\nUser: " + message |
| if system_prompt.strip(): |
| full_prompt = system_prompt + "\n\n" + full_prompt |
| response = call_ai(full_prompt, provider, model, api_key, ollama_host) |
| history = history + [[message, response]] |
| return history, "" |
|
|
| def ai_generate_text(prompt, provider, model, api_key, ollama_host): |
| if not prompt.strip(): |
| return "⚠️ Введите запрос" |
| return call_ai(prompt, provider, model, api_key, ollama_host) |
|
|
| def ai_generate_with_kb(selected_entries, writing_task, provider, model, api_key, ollama_host): |
| prompt = compose_context_for_writing(selected_entries, writing_task) |
| if prompt.startswith("⚠️"): |
| return prompt |
| return call_ai(prompt, provider, model, api_key, ollama_host) |
|
|
| def clear_chat(): |
| return [], "" |
|
|
|
|
| with gr.Blocks(title="Writing Style AI", css=macos_css, theme=gr.themes.Default( |
| font=gr.themes.GoogleFont("Inter"), |
| primary_hue="blue", |
| secondary_hue="gray", |
| neutral_hue="gray", |
| )) as demo: |
|
|
| |
| gr.HTML(""" |
| <div class="app-header"> |
| <div class="traffic-lights"> |
| <span class="tl tl-red"></span> |
| <span class="tl tl-yellow"></span> |
| <span class="tl tl-green"></span> |
| </div> |
| <div style="display:flex;align-items:center;gap:10px;margin-left:12px;"> |
| <span style="font-size:22px;">✍️</span> |
| <div> |
| <div style="font-size:17px;font-weight:700;color:#1c1c1e;letter-spacing:-0.3px;">Writing Style AI</div> |
| <div style="font-size:11px;color:#8e8e93;font-weight:500;">Dataset · Fine-tune · Knowledge Base · AI Chat</div> |
| </div> |
| </div> |
| </div> |
| """) |
|
|
| with gr.Tabs(): |
|
|
| |
| with gr.Tab("🤖 AI Ассистент"): |
|
|
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin-bottom:6px;">ПРОВАЙДЕР</div>') |
| ai_provider = gr.Radio( |
| choices=list(PROVIDER_MODELS.keys()), |
| value="🤗 HuggingFace (бесплатно)", |
| label="", |
| elem_id="provider_radio", |
| ) |
| with gr.Column(scale=1): |
| ai_model = gr.Dropdown( |
| choices=PROVIDER_MODELS["🤗 HuggingFace (бесплатно)"], |
| value="mistralai/Mistral-7B-Instruct-v0.3", |
| label="Модель", |
| ) |
| ai_api_key = gr.Textbox( |
| label="API ключ", |
| placeholder="sk-... или hf_... (необязательно для HF)", |
| type="password", |
| ) |
| ai_ollama_host = gr.Textbox( |
| label="Ollama Host", |
| value="http://localhost:11434", |
| visible=False, |
| ) |
|
|
| |
| with gr.Row(): |
| with gr.Column(scale=2): |
| ai_system_prompt = gr.Textbox( |
| label="System Prompt (необязательно)", |
| placeholder="Ты — профессиональный писатель в стиле Хемингуэя...", |
| lines=2, |
| ) |
| ai_chatbot = gr.Chatbot( |
| label="", |
| height=420, |
| show_copy_button=True, |
| avatar_images=["🧑", "🤖"], |
| elem_id="macos_chat", |
| ) |
| with gr.Row(): |
| ai_input = gr.Textbox( |
| label="", |
| placeholder="Напишите сообщение... (Enter для отправки)", |
| lines=2, |
| scale=4, |
| ) |
| with gr.Column(scale=1, min_width=100): |
| ai_send_btn = gr.Button("Отправить ↑", variant="primary") |
| ai_clear_btn = gr.Button("Очистить", variant="secondary") |
|
|
| with gr.Column(scale=1): |
| gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin-bottom:8px;">БЫСТРЫЕ ЗАПРОСЫ</div>') |
| gr.HTML(""" |
| <div style="display:flex;flex-direction:column;gap:6px;"> |
| <div style="background:rgba(0,122,255,0.08);border:1px solid rgba(0,122,255,0.2);border-radius:8px;padding:10px;font-size:13px;cursor:pointer;" onclick="document.querySelector('#macos_chat input, textarea').value='Напиши статью про ИИ в стиле научпоп'">📄 Статья в стиле научпоп</div> |
| <div style="background:rgba(52,199,89,0.08);border:1px solid rgba(52,199,89,0.2);border-radius:8px;padding:10px;font-size:13px;" >💡 Генерация идей</div> |
| <div style="background:rgba(255,149,0,0.08);border:1px solid rgba(255,149,0,0.2);border-radius:8px;padding:10px;font-size:13px;" >✏️ Редактирование текста</div> |
| <div style="background:rgba(175,82,222,0.08);border:1px solid rgba(175,82,222,0.2);border-radius:8px;padding:10px;font-size:13px;" >🔄 Перевод и локализация</div> |
| </div> |
| """) |
|
|
| gr.HTML('<div style="font-size:11px;font-weight:700;color:#8e8e93;letter-spacing:0.5px;text-transform:uppercase;margin:16px 0 8px;">ОДИНОЧНЫЙ ЗАПРОС</div>') |
| single_prompt = gr.Textbox(label="Промпт", lines=4, placeholder="Введите промпт...") |
| single_generate_btn = gr.Button("⚡ Генерировать", variant="primary") |
| single_output = gr.Textbox(label="Результат", lines=8, show_copy_button=True) |
|
|
| |
| ai_provider.change(update_models, inputs=[ai_provider], outputs=[ai_model]) |
| ai_provider.change( |
| lambda p: gr.update(visible=p == "🏠 Ollama (локально)"), |
| inputs=[ai_provider], outputs=[ai_ollama_host] |
| ) |
| ai_send_btn.click( |
| ai_chat, |
| inputs=[ai_input, ai_chatbot, ai_provider, ai_model, ai_api_key, ai_ollama_host, ai_system_prompt], |
| outputs=[ai_chatbot, ai_input], |
| ) |
| ai_input.submit( |
| ai_chat, |
| inputs=[ai_input, ai_chatbot, ai_provider, ai_model, ai_api_key, ai_ollama_host, ai_system_prompt], |
| outputs=[ai_chatbot, ai_input], |
| ) |
| ai_clear_btn.click(clear_chat, outputs=[ai_chatbot, ai_input]) |
| single_generate_btn.click( |
| ai_generate_text, |
| inputs=[single_prompt, ai_provider, ai_model, ai_api_key, ai_ollama_host], |
| outputs=[single_output], |
| ) |
|
|
| |
| with gr.Tab("📦 Dataset Builder"): |
| gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">Загрузите ваши тексты (DOCX / TXT) для создания датасета</h3>') |
| with gr.Row(): |
| with gr.Column(scale=2): |
| file_input = gr.File( |
| label="DOCX или TXT файлы", |
| file_count="multiple", |
| file_types=[".docx", ".txt"], |
| ) |
| with gr.Row(): |
| chunk_size = gr.Slider(128, 1024, value=512, step=64, label="Размер чанка (слов)") |
| overlap = gr.Slider(0, 128, value=64, step=16, label="Перекрытие (слов)") |
| add_qa = gr.Checkbox(value=True, label="Генерировать QA-пары") |
| format_choice = gr.Radio(["JSONL", "JSON", "CSV"], value="JSONL", label="Формат") |
| dataset_name = gr.Textbox(label="Название датасета", value="my_dataset") |
| with gr.Row(): |
| process_btn = gr.Button("⚙️ Обработать файлы", variant="primary") |
| clear_btn = gr.Button("🗑️ Очистить буфер", variant="secondary") |
| save_btn = gr.Button("💾 Сохранить датасет", variant="primary", interactive=False) |
| with gr.Column(scale=1): |
| status_box = gr.Textbox(label="Статус", lines=8) |
| preview_box = gr.Textbox(label="Превью записей", lines=8) |
| file_output = gr.File(label="Скачать датасет") |
| datasets_list = gr.Textbox(label="Сохранённые датасеты", lines=4, value=list_datasets) |
|
|
| process_btn.click( |
| process_files, |
| inputs=[file_input, chunk_size, overlap, format_choice, add_qa], |
| outputs=[status_box, save_btn, preview_box], |
| ) |
| save_btn.click( |
| save_dataset, |
| inputs=[format_choice, dataset_name], |
| outputs=[status_box, file_output], |
| ).then(lambda: list_datasets(), outputs=datasets_list) |
| clear_btn.click(clear_buffer, outputs=status_box) |
|
|
| |
| with gr.Tab("🧠 Fine-tuning (LoRA)"): |
| gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">Бесплатное обучение LoRA через Google Colab</h3>') |
| with gr.Row(): |
| with gr.Column(): |
| model_choice = gr.Radio( |
| ["Mistral 7B (рекомендуется)", "Llama 3.1 8B", "Gemma 2 9B", "Phi-3 Mini (быстрее)"], |
| value="Mistral 7B (рекомендуется)", |
| label="Базовая модель", |
| ) |
| dataset_path_input = gr.Textbox( |
| label="Путь к датасету", |
| placeholder="datasets/my_dataset.jsonl", |
| ) |
| guide_btn = gr.Button("📋 Получить инструкции", variant="primary") |
| with gr.Column(scale=2): |
| guide_output = gr.Markdown() |
| guide_btn.click(get_colab_guide, inputs=[model_choice, dataset_path_input], outputs=guide_output) |
| gr.HTML('<div style="margin-top:12px;font-size:13px;color:#6e6e73;"><b>Ресурсы:</b> <a href="https://colab.research.google.com" target="_blank" style="color:#007aff;">Google Colab</a> · <a href="https://github.com/unslothai/unsloth" target="_blank" style="color:#007aff;">Unsloth</a> · <a href="https://huggingface.co" target="_blank" style="color:#007aff;">HuggingFace</a> · <a href="https://www.kaggle.com/code" target="_blank" style="color:#007aff;">Kaggle</a></div>') |
|
|
| |
| with gr.Tab("📚 Knowledge Base"): |
| gr.HTML('<h3 style="margin:0 0 12px;font-size:15px;font-weight:600;color:#1c1c1e;">База знаний — контекст для генерации текстов</h3>') |
| with gr.Tabs(): |
| with gr.Tab("➕ Добавить запись"): |
| with gr.Row(): |
| with gr.Column(): |
| kb_name = gr.Textbox(label="Название записи", placeholder="Мой стиль / Тема...") |
| kb_tags = gr.Textbox(label="Теги (через запятую)", placeholder="стиль, технический") |
| kb_text = gr.Textbox(label="Текст / Контекст", lines=8) |
| kb_files = gr.File( |
| label="Загрузить DOCX/TXT", |
| file_count="multiple", |
| file_types=[".docx", ".txt"], |
| ) |
| kb_add_btn = gr.Button("✅ Добавить в базу", variant="primary") |
| with gr.Column(): |
| kb_status = gr.Textbox(label="Статус", lines=3) |
| kb_list_add = gr.CheckboxGroup(label="Записи в базе", choices=get_kb_choices()) |
|
|
| with gr.Tab("🔍 Просмотр и поиск"): |
| with gr.Row(): |
| kb_list_view = gr.CheckboxGroup(label="Записи (выберите для поиска)", choices=get_kb_choices()) |
| refresh_btn = gr.Button("🔄 Обновить список", variant="secondary") |
| with gr.Row(): |
| search_query = gr.Textbox(label="Поиск", placeholder="ключевое слово...") |
| search_btn = gr.Button("Найти", variant="primary") |
| search_results = gr.Markdown() |
| with gr.Row(): |
| view_entry = gr.Dropdown(label="Просмотреть запись", choices=get_kb_choices()) |
| view_btn = gr.Button("👁 Показать", variant="secondary") |
| del_btn = gr.Button("🗑️ Удалить", variant="stop") |
| kb_del_status = gr.Textbox(label="Статус", lines=2) |
| with gr.Row(): |
| entry_text = gr.Textbox(label="Текст", lines=6, interactive=False) |
| entry_tags = gr.Textbox(label="Теги", interactive=False) |
| entry_created = gr.Textbox(label="Создано", interactive=False) |
|
|
| with gr.Tab("✍️ Генерация с KB + AI"): |
| gr.HTML('<div style="font-size:13px;color:#6e6e73;margin-bottom:12px;">Выберите записи из базы знаний, задайте задание — AI напишет текст в нужном стиле</div>') |
| gen_entries = gr.CheckboxGroup(label="Выберите записи из KB", choices=get_kb_choices()) |
| refresh_gen_btn = gr.Button("🔄 Обновить", variant="secondary") |
| writing_task = gr.Textbox( |
| label="Задание для генерации", |
| placeholder="Напиши статью про... / Создай пост о...", |
| lines=3, |
| ) |
| with gr.Row(): |
| compose_btn = gr.Button("📝 Сформировать промпт", variant="secondary") |
| kb_ai_generate_btn = gr.Button("🤖 Сгенерировать через AI", variant="primary") |
| composed_prompt = gr.Textbox( |
| label="Готовый промпт", |
| lines=8, |
| show_copy_button=True, |
| ) |
| kb_ai_result = gr.Textbox( |
| label="Результат AI", |
| lines=12, |
| show_copy_button=True, |
| ) |
|
|
| |
| kb_add_btn.click( |
| add_kb_entry, |
| inputs=[kb_name, kb_text, kb_tags, kb_files], |
| outputs=[kb_status, kb_list_add], |
| ) |
| refresh_btn.click(refresh_kb, outputs=kb_list_view) |
| search_btn.click(search_kb, inputs=[search_query, kb_list_view], outputs=search_results) |
| view_btn.click(get_kb_entry_content, inputs=[view_entry], outputs=[entry_text, entry_tags, entry_created]) |
| del_btn.click(delete_kb_entry, inputs=[view_entry], outputs=[kb_del_status, kb_list_view]) |
| refresh_gen_btn.click(refresh_kb, outputs=gen_entries) |
| compose_btn.click(compose_context_for_writing, inputs=[gen_entries, writing_task], outputs=composed_prompt) |
| kb_ai_generate_btn.click( |
| ai_generate_with_kb, |
| inputs=[gen_entries, writing_task, ai_provider, ai_model, ai_api_key, ai_ollama_host], |
| outputs=[kb_ai_result], |
| ) |
|
|
| gr.HTML(""" |
| <div style="text-align:center;padding:16px 0 4px;font-size:12px;color:#8e8e93;"> |
| ✍️ <b>Writing Style AI</b> — Dataset Builder · LoRA Fine-tuning · Knowledge Base · AI Chat |
| </div> |
| """) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|