Spaces:

samzito12
/

iris

Sleeping

App Files Files Community

iris / app.py

samzito12

Update app.py

246f6b2 verified about 2 months ago

raw

history blame contribute delete

2.71 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch

	print("Downloading the model ...")

	model_name = "samzito12/lora_model3"

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = "left"

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	device_map="cpu",
	torch_dtype=torch.float32,
	low_cpu_mem_usage=True
	)

	print("✅ Downloaded model with CPU optimisations")
	model.eval()

	SYSTEM_PROMPT = """You are a helpful AI coding assistant based on Meta's Llama-3.2-3B model.
	Your task is to assist users with programming-related questions: write code snippets, debug code, explain concepts clearly, and provide best practices.
	Always respond in a concise, clear, and friendly manner, and adapt your explanations to the user's level."""


	def chat(message, history, temperature=1.5, max_tokens=128):
	# Build conversation
	conversation = f"System: {SYSTEM_PROMPT}\n\n"

	for user_msg, assistant_msg in history:
	conversation += f"User: {user_msg}\nAssistant: {assistant_msg}\n"

	conversation += f"User: {message}\nAssistant:"

	# Tokenize
	inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=1024, padding=True)

	# Generate
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.7,
	do_sample=True,
	use_cache=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# Decode
	full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extract assistant's response
	if "Assistant:" in full_response:
	response = full_response.split("Assistant:")[-1].strip()
	else:
	response = full_response[len(conversation):].strip()

	return response

	demo = gr.ChatInterface(
	chat,
	title="Your Coding Assistant",
	description="""
	Model: This chatbot was fine-tuned to provide a free coding service, designed to assist users in writing, debugging, and optimizing code across various programming languages.
	""",
	examples=[
	["What model are you?", 0.7, 128],
	["Explain machine learning in simple terms", 0.7, 128],
	["Write a Python function to reverse a string", 0.7, 128]
	],
	additional_inputs=[
	gr.Slider(minimum=0, maximum=2, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(minimum=32, maximum=512, value=128, step=16, label="Max Tokens")
	],
	theme="soft",
	)

	if __name__ == "__main__":
	demo.launch()