study3 / app.py
eleminu's picture
update
8337720
Raw
History Blame Contribute Delete
12.6 kB
"""
import gradio as gr
from huggingface_hub import InferenceClient
def respond(
message,
history: list[dict[str, str]],
system_message,
max_tokens,
temperature,
top_p,
hf_token: gr.OAuthToken,
):
"""
# For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
messages = [{"role": "system", "content": system_message}]
messages.extend(history)
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
choices = message.choices
token = ""
if len(choices) and choices[0].delta.content:
token = choices[0].delta.content
response += token
yield response
"""
#For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
chatbot = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
with gr.Blocks() as demo:
with gr.Sidebar():
gr.LoginButton()
chatbot.render()
if __name__ == "__main__":
demo.launch()
"""
"""
import gradio as gr
import torch
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import os
token = os.getenv("HF_TOKEN")
# load model
model_id = "meta-llama/Llama-3.2-3B-Instruct"
#model_id = "Qwen/Qwen2.5-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
if tokenizer.pad_token_id is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="cpu",
token=token,
torch_dtype=torch.float32,
)
model.eval()
session_log = []
# input normaliser
def extract_user_text(message):
def collect(x):
if x is None:
return []
if isinstance(x, str):
return [x]
if isinstance(x, dict):
results = []
for key in ("text", "content"):
if key in x:
results.extend(collect(x[key]))
return results
if isinstance(x, list):
results = []
for item in x:
results.extend(collect(item))
return results
return []
texts = collect(message)
cleaned = " ".join(t.strip() for t in texts if isinstance(t, str) and t.strip())
return cleaned if cleaned else ""
# for storing the chat
def build_messages(history, message):
messages = []
if history is not None:
for h in history:
# old Gradio format: (user, assistant)
if isinstance(h, (list, tuple)) and len(h) == 2:
user_text = extract_user_text(h[0]) # <-- FIX (no str())
assistant_text = extract_user_text(h[1]) # <-- FIX (no str())
if user_text:
messages.append({"role": "user", "content": user_text})
if assistant_text:
messages.append({"role": "assistant", "content": assistant_text})
continue
# new Gradio format: {"role":..., "content":...}
if isinstance(h, dict):
if "role" in h and "content" in h:
role = h.get("role")
content = extract_user_text(h.get("content")) # <-- FIX (normalize)
if role in ("user", "assistant", "system") and content:
messages.append({"role": role, "content": content})
continue
# fallback for other dict shapes
text = extract_user_text(h)
if text:
messages.append({"role": "user", "content": text})
messages.append({"role": "user", "content": message})
return messages
# stream chat to gradio
def stream_chat(message, history):
# STEP 1: NORMALISE INPUT
message = extract_user_text(message)
# STEP 2: BUILD MESSAGE HISTORY
messages = build_messages(history, message)
# STEP 3: TOKENIZE (use text prompt to avoid odd tensor/dict behavior)
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
input_ids = tokenizer(prompt, return_tensors="pt").input_ids
# STEP 4: STREAMER
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True,
)
# STEP 5: GENERATION THREAD
def _gen():
with torch.inference_mode():
model.generate(
input_ids=input_ids,
streamer=streamer,
max_new_tokens=120, # CPU SAFE LIMIT
temperature=0.7,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
thread = Thread(target=_gen)
thread.start()
output = ""
for text in streamer:
if text:
output += text
yield output
session_log.append({"user": message, "assistant": output})
# launch gradio ui
demo = gr.ChatInterface(
fn=stream_chat,
title="🦙 LLaMA 3",
description="Hi",
)
demo.launch()
"""
import gradio as gr
import torch
import os
import numpy as np
from threading import Thread
from empath import Empath
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import torch.nn as nn
# -------------------------
# CONFIG
# -------------------------
token = os.getenv("HF_TOKEN")
model_id = "meta-llama/Llama-3.2-3B-Instruct"
device = "cpu"
# -------------------------
# LOAD MODEL (FROZEN LLM)
# -------------------------
tokenizer = AutoTokenizer.from_pretrained(model_id, token=token)
if tokenizer.pad_token_id is None:
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
model_id,
token=token,
torch_dtype=torch.float32,
device_map="cpu",
)
model.eval()
for p in model.parameters():
p.requires_grad = False # IMPORTANT: freeze LLM
# -------------------------
# EMPATH STYLE ENCODER
# -------------------------
class EmpathVectorizer:
def __init__(self):
self.lexicon = Empath()
# fixed style space
self.dimensions = [
"positive_emotion",
"negative_emotion",
"social",
"communication",
"technology",
"cognitive_process",
"work",
"family",
]
def transform(self, text):
raw = self.lexicon.analyze(text, normalize=True)
vec = np.array(
[raw.get(d, 0.0) for d in self.dimensions],
dtype=np.float32
)
vec = vec / (np.linalg.norm(vec) + 1e-8)
return vec
# -------------------------
# PREFIX TUNING MODULE (PEFT)
# -------------------------
class PrefixPEFT(nn.Module):
def __init__(self, style_dim, d_model, prefix_len=8):
super().__init__()
self.prefix_len = prefix_len
self.mlp = nn.Sequential(
nn.Linear(style_dim, d_model * prefix_len),
nn.Tanh()
)
def forward(self, style_vec):
x = self.mlp(style_vec)
return x.view(1, self.prefix_len, -1)
# -------------------------
# INIT SYSTEM
# -------------------------
vectorizer = EmpathVectorizer()
prefix_model = PrefixPEFT(
style_dim=len(vectorizer.dimensions),
d_model=model.config.hidden_size,
prefix_len=8
)
optimizer = torch.optim.Adam(prefix_model.parameters(), lr=1e-3)
# -------------------------
# TEXT UTILS
# -------------------------
def extract_user_text(x):
if isinstance(x, str):
return x
if isinstance(x, dict):
return x.get("text") or x.get("content") or ""
return str(x)
# -------------------------
# STYLE FROM USER HISTORY (CORRECT DESIGN)
# -------------------------
def build_user_style(history):
"""
Empath is computed ONLY on USER messages.
"""
texts = []
if history:
for h in history:
if isinstance(h, (list, tuple)) and len(h) == 2:
user = extract_user_text(h[0])
if user:
texts.append(user)
return " ".join(texts)[-3000:] # cap
# -------------------------
# CHAT BUILDING
# -------------------------
def build_messages(history, message):
messages = []
if history:
for h in history:
if isinstance(h, (list, tuple)) and len(h) == 2:
u = extract_user_text(h[0])
a = extract_user_text(h[1])
if u:
messages.append({"role": "user", "content": u})
if a:
messages.append({"role": "assistant", "content": a})
messages.append({"role": "user", "content": message})
return messages
# -------------------------
# TRAIN STEP (CORE LEARNING LOGIC)
# -------------------------
def train_step(user_history, prompt, target):
optimizer.zero_grad()
# 1. STYLE VECTOR (Empath on USER HISTORY ONLY)
style_text = build_user_style(user_history)
style_vec = vectorizer.transform(style_text)
style_vec = torch.tensor(style_vec, dtype=torch.float32).unsqueeze(0)
# 2. PREFIX
prefix = prefix_model(style_vec)
# 3. TOKENIZE
prompt_ids = tokenizer(prompt, return_tensors="pt").input_ids
target_ids = tokenizer(target, return_tensors="pt").input_ids
# 4. EMBEDDINGS
prompt_embeds = model.model.embed_tokens(prompt_ids)
# 5. PREFIX + PROMPT
inputs_embeds = torch.cat([prefix, prompt_embeds], dim=1)
attention_mask = torch.ones(inputs_embeds.shape[:2])
# 6. FORWARD
outputs = model(
inputs_embeds=inputs_embeds,
attention_mask=attention_mask
)
logits = outputs.logits[:, prefix.shape[1]:, :]
# 7. LOSS
loss = torch.nn.functional.cross_entropy(
logits.reshape(-1, logits.size(-1)),
target_ids.reshape(-1)
)
loss.backward()
optimizer.step()
return loss.item()
# -------------------------
# STREAM CHAT (INFERENCE)
# -------------------------
def stream_chat(message, history):
message = extract_user_text(message)
messages = build_messages(history, message)
# prompt = last user message context
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
inputs = tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"]
# STYLE (USER HISTORY ONLY)
style_text = build_user_style(history + [(message, "")])
style_vec = vectorizer.transform(style_text)
style_vec = torch.tensor(style_vec, dtype=torch.float32).unsqueeze(0)
prefix = prefix_model(style_vec)
token_embeds = model.model.embed_tokens(input_ids)
inputs_embeds = torch.cat([prefix, token_embeds], dim=1)
attention_mask = torch.ones(inputs_embeds.shape[:2])
streamer = TextIteratorStreamer(
tokenizer,
skip_prompt=True,
skip_special_tokens=True,
)
def _gen():
with torch.inference_mode():
model.generate(
inputs_embeds=inputs_embeds,
attention_mask=attention_mask,
streamer=streamer,
max_new_tokens=120,
temperature=0.7,
do_sample=True,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
Thread(target=_gen).start()
out = ""
for t in streamer:
out += t
yield out
# -------------------------
# GRADIO UI
# -------------------------
demo = gr.ChatInterface(
fn=stream_chat,
title="🧠 Empath + Prefix-Tuned LLaMA (User-Conditioned Style)",
description="Style is learned ONLY from user history via Empath → prefix tuning."
)
demo.launch()