| """ |
| import gradio as gr |
| from huggingface_hub import InferenceClient |
| |
| |
| def respond( |
| message, |
| history: list[dict[str, str]], |
| system_message, |
| max_tokens, |
| temperature, |
| top_p, |
| hf_token: gr.OAuthToken, |
| ): |
| """ |
| |
| """ |
| client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b") |
| |
| messages = [{"role": "system", "content": system_message}] |
| |
| messages.extend(history) |
| |
| messages.append({"role": "user", "content": message}) |
| |
| response = "" |
| |
| for message in client.chat_completion( |
| messages, |
| max_tokens=max_tokens, |
| stream=True, |
| temperature=temperature, |
| top_p=top_p, |
| ): |
| choices = message.choices |
| token = "" |
| if len(choices) and choices[0].delta.content: |
| token = choices[0].delta.content |
| |
| response += token |
| yield response |
| |
| """ |
|
|
| |
| """ |
| chatbot = gr.ChatInterface( |
| respond, |
| additional_inputs=[ |
| gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
| gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.95, |
| step=0.05, |
| label="Top-p (nucleus sampling)", |
| ), |
| ], |
| ) |
| |
| with gr.Blocks() as demo: |
| with gr.Sidebar(): |
| gr.LoginButton() |
| chatbot.render() |
| |
| |
| if __name__ == "__main__": |
| demo.launch() |
| """ |
|
|
| """ |
| import gradio as gr |
| import torch |
| from threading import Thread |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer |
| import os |
| |
| token = os.getenv("HF_TOKEN") |
| |
| |
| # load model |
| model_id = "meta-llama/Llama-3.2-3B-Instruct" |
| #model_id = "Qwen/Qwen2.5-3B-Instruct" |
| tokenizer = AutoTokenizer.from_pretrained(model_id, token=token) |
| if tokenizer.pad_token_id is None: |
| tokenizer.pad_token = tokenizer.eos_token |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| device_map="cpu", |
| token=token, |
| torch_dtype=torch.float32, |
| ) |
| model.eval() |
| |
| session_log = [] |
| |
| |
| # input normaliser |
| def extract_user_text(message): |
| def collect(x): |
| if x is None: |
| return [] |
| if isinstance(x, str): |
| return [x] |
| if isinstance(x, dict): |
| results = [] |
| for key in ("text", "content"): |
| if key in x: |
| results.extend(collect(x[key])) |
| return results |
| if isinstance(x, list): |
| results = [] |
| for item in x: |
| results.extend(collect(item)) |
| return results |
| return [] |
| |
| texts = collect(message) |
| cleaned = " ".join(t.strip() for t in texts if isinstance(t, str) and t.strip()) |
| return cleaned if cleaned else "" |
| |
| |
| # for storing the chat |
| def build_messages(history, message): |
| messages = [] |
| |
| if history is not None: |
| for h in history: |
| # old Gradio format: (user, assistant) |
| if isinstance(h, (list, tuple)) and len(h) == 2: |
| user_text = extract_user_text(h[0]) # <-- FIX (no str()) |
| assistant_text = extract_user_text(h[1]) # <-- FIX (no str()) |
| if user_text: |
| messages.append({"role": "user", "content": user_text}) |
| if assistant_text: |
| messages.append({"role": "assistant", "content": assistant_text}) |
| continue |
| |
| # new Gradio format: {"role":..., "content":...} |
| if isinstance(h, dict): |
| if "role" in h and "content" in h: |
| role = h.get("role") |
| content = extract_user_text(h.get("content")) # <-- FIX (normalize) |
| if role in ("user", "assistant", "system") and content: |
| messages.append({"role": role, "content": content}) |
| continue |
| |
| # fallback for other dict shapes |
| text = extract_user_text(h) |
| if text: |
| messages.append({"role": "user", "content": text}) |
| |
| messages.append({"role": "user", "content": message}) |
| return messages |
| |
| |
| # stream chat to gradio |
| def stream_chat(message, history): |
| # STEP 1: NORMALISE INPUT |
| message = extract_user_text(message) |
| |
| # STEP 2: BUILD MESSAGE HISTORY |
| messages = build_messages(history, message) |
| |
| # STEP 3: TOKENIZE (use text prompt to avoid odd tensor/dict behavior) |
| prompt = tokenizer.apply_chat_template( |
| messages, |
| tokenize=False, |
| add_generation_prompt=True, |
| ) |
| input_ids = tokenizer(prompt, return_tensors="pt").input_ids |
| |
| # STEP 4: STREAMER |
| streamer = TextIteratorStreamer( |
| tokenizer, |
| skip_prompt=True, |
| skip_special_tokens=True, |
| ) |
| |
| # STEP 5: GENERATION THREAD |
| def _gen(): |
| with torch.inference_mode(): |
| model.generate( |
| input_ids=input_ids, |
| streamer=streamer, |
| max_new_tokens=120, # CPU SAFE LIMIT |
| temperature=0.7, |
| do_sample=True, |
| eos_token_id=tokenizer.eos_token_id, |
| pad_token_id=tokenizer.pad_token_id, |
| ) |
| |
| thread = Thread(target=_gen) |
| thread.start() |
| |
| output = "" |
| for text in streamer: |
| if text: |
| output += text |
| yield output |
| |
| session_log.append({"user": message, "assistant": output}) |
| |
| |
| # launch gradio ui |
| demo = gr.ChatInterface( |
| fn=stream_chat, |
| title="🦙 LLaMA 3", |
| description="Hi", |
| ) |
| |
| demo.launch() |
| """ |
|
|
| import gradio as gr |
| import torch |
| import os |
| import numpy as np |
| from threading import Thread |
| from empath import Empath |
| from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer |
| import torch.nn as nn |
|
|
| |
| |
| |
| token = os.getenv("HF_TOKEN") |
| model_id = "meta-llama/Llama-3.2-3B-Instruct" |
|
|
| device = "cpu" |
|
|
| |
| |
| |
| tokenizer = AutoTokenizer.from_pretrained(model_id, token=token) |
|
|
| if tokenizer.pad_token_id is None: |
| tokenizer.pad_token = tokenizer.eos_token |
|
|
| model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| token=token, |
| torch_dtype=torch.float32, |
| device_map="cpu", |
| ) |
| model.eval() |
|
|
| for p in model.parameters(): |
| p.requires_grad = False |
|
|
| |
| |
| |
| class EmpathVectorizer: |
| def __init__(self): |
| self.lexicon = Empath() |
|
|
| |
| self.dimensions = [ |
| "positive_emotion", |
| "negative_emotion", |
| "social", |
| "communication", |
| "technology", |
| "cognitive_process", |
| "work", |
| "family", |
| ] |
|
|
| def transform(self, text): |
| raw = self.lexicon.analyze(text, normalize=True) |
|
|
| vec = np.array( |
| [raw.get(d, 0.0) for d in self.dimensions], |
| dtype=np.float32 |
| ) |
|
|
| vec = vec / (np.linalg.norm(vec) + 1e-8) |
| return vec |
|
|
|
|
| |
| |
| |
| class PrefixPEFT(nn.Module): |
| def __init__(self, style_dim, d_model, prefix_len=8): |
| super().__init__() |
| self.prefix_len = prefix_len |
|
|
| self.mlp = nn.Sequential( |
| nn.Linear(style_dim, d_model * prefix_len), |
| nn.Tanh() |
| ) |
|
|
| def forward(self, style_vec): |
| x = self.mlp(style_vec) |
| return x.view(1, self.prefix_len, -1) |
|
|
|
|
| |
| |
| |
| vectorizer = EmpathVectorizer() |
|
|
| prefix_model = PrefixPEFT( |
| style_dim=len(vectorizer.dimensions), |
| d_model=model.config.hidden_size, |
| prefix_len=8 |
| ) |
|
|
| optimizer = torch.optim.Adam(prefix_model.parameters(), lr=1e-3) |
|
|
| |
| |
| |
| def extract_user_text(x): |
| if isinstance(x, str): |
| return x |
| if isinstance(x, dict): |
| return x.get("text") or x.get("content") or "" |
| return str(x) |
|
|
|
|
| |
| |
| |
| def build_user_style(history): |
| """ |
| Empath is computed ONLY on USER messages. |
| """ |
| texts = [] |
|
|
| if history: |
| for h in history: |
| if isinstance(h, (list, tuple)) and len(h) == 2: |
| user = extract_user_text(h[0]) |
| if user: |
| texts.append(user) |
|
|
| return " ".join(texts)[-3000:] |
|
|
|
|
| |
| |
| |
| def build_messages(history, message): |
| messages = [] |
|
|
| if history: |
| for h in history: |
| if isinstance(h, (list, tuple)) and len(h) == 2: |
| u = extract_user_text(h[0]) |
| a = extract_user_text(h[1]) |
|
|
| if u: |
| messages.append({"role": "user", "content": u}) |
| if a: |
| messages.append({"role": "assistant", "content": a}) |
|
|
| messages.append({"role": "user", "content": message}) |
| return messages |
|
|
|
|
| |
| |
| |
| def train_step(user_history, prompt, target): |
| optimizer.zero_grad() |
|
|
| |
| style_text = build_user_style(user_history) |
|
|
| style_vec = vectorizer.transform(style_text) |
| style_vec = torch.tensor(style_vec, dtype=torch.float32).unsqueeze(0) |
|
|
| |
| prefix = prefix_model(style_vec) |
|
|
| |
| prompt_ids = tokenizer(prompt, return_tensors="pt").input_ids |
| target_ids = tokenizer(target, return_tensors="pt").input_ids |
|
|
| |
| prompt_embeds = model.model.embed_tokens(prompt_ids) |
|
|
| |
| inputs_embeds = torch.cat([prefix, prompt_embeds], dim=1) |
| attention_mask = torch.ones(inputs_embeds.shape[:2]) |
|
|
| |
| outputs = model( |
| inputs_embeds=inputs_embeds, |
| attention_mask=attention_mask |
| ) |
|
|
| logits = outputs.logits[:, prefix.shape[1]:, :] |
|
|
| |
| loss = torch.nn.functional.cross_entropy( |
| logits.reshape(-1, logits.size(-1)), |
| target_ids.reshape(-1) |
| ) |
|
|
| loss.backward() |
| optimizer.step() |
|
|
| return loss.item() |
|
|
|
|
| |
| |
| |
| def stream_chat(message, history): |
|
|
| message = extract_user_text(message) |
|
|
| messages = build_messages(history, message) |
|
|
| |
| prompt = tokenizer.apply_chat_template( |
| messages, |
| tokenize=False, |
| add_generation_prompt=True |
| ) |
|
|
| inputs = tokenizer(prompt, return_tensors="pt") |
| input_ids = inputs["input_ids"] |
|
|
| |
| style_text = build_user_style(history + [(message, "")]) |
|
|
| style_vec = vectorizer.transform(style_text) |
| style_vec = torch.tensor(style_vec, dtype=torch.float32).unsqueeze(0) |
|
|
| prefix = prefix_model(style_vec) |
|
|
| token_embeds = model.model.embed_tokens(input_ids) |
|
|
| inputs_embeds = torch.cat([prefix, token_embeds], dim=1) |
| attention_mask = torch.ones(inputs_embeds.shape[:2]) |
|
|
| streamer = TextIteratorStreamer( |
| tokenizer, |
| skip_prompt=True, |
| skip_special_tokens=True, |
| ) |
|
|
| def _gen(): |
| with torch.inference_mode(): |
| model.generate( |
| inputs_embeds=inputs_embeds, |
| attention_mask=attention_mask, |
| streamer=streamer, |
| max_new_tokens=120, |
| temperature=0.7, |
| do_sample=True, |
| eos_token_id=tokenizer.eos_token_id, |
| pad_token_id=tokenizer.pad_token_id, |
| ) |
|
|
| Thread(target=_gen).start() |
|
|
| out = "" |
| for t in streamer: |
| out += t |
| yield out |
|
|
|
|
| |
| |
| |
| demo = gr.ChatInterface( |
| fn=stream_chat, |
| title="🧠 Empath + Prefix-Tuned LLaMA (User-Conditioned Style)", |
| description="Style is learned ONLY from user history via Empath → prefix tuning." |
| ) |
|
|
| demo.launch() |