Lumen / app.py
alxstuff's picture
Update app.py
62793bf verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread
BASE_MODEL = "Qwen/Qwen2.5-Coder-7B-Instruct"
LORA_REPO = "alxstuff/Lumen-7b-v2"
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
print("Loading base model...")
model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL,
torch_dtype=torch.float16,
device_map="auto",
trust_remote_code=True,
low_cpu_mem_usage=True,
)
print("Loading LoRA adapter...")
model.load_adapter(LORA_REPO)
model.eval()
print("✅ Lumen ready!")
def chat(message, history):
prompt = "<|im_start|>system\nYou are Lumen, an expert AI coding assistant built by TheAlxLabs. You write clean, efficient code and explain it clearly.<|im_end|>\n"
for user, assistant in history:
prompt += f"<|im_start|>user\n{user}<|im_end|>\n<|im_start|>assistant\n{assistant}<|im_end|>\n"
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
thread = Thread(target=model.generate, kwargs={
**inputs,
"streamer": streamer,
"max_new_tokens": 1024,
"temperature": 0.2,
"do_sample": True,
})
thread.start()
response = ""
for token in streamer:
response += token
yield response
gr.ChatInterface(
fn=chat,
title="⚡ Lumen — AI Coding Assistant",
description="Local-first AI coding assistant by TheAlxLabs.",
examples=[
"Write a Python function to reverse a linked list",
"Explain what this does: `[x for x in range(10) if x % 2 == 0]`",
"Fix this bug: TypeError: 'NoneType' object is not subscriptable"
],
).launch()