Spaces:

SALEETAI
/

Coder

Sleeping

App Files Files Community

Coder / app.py

SALEETAI

Update app.py

e9b33cc verified 28 days ago

raw

history blame contribute delete

2.58 kB

	import gradio as gr
	from llama_cpp import Llama
	from huggingface_hub import hf_hub_download

	# ==========================================
	# 1. MODEL CONFIGURATION
	# ==========================================
	# This downloads your specific model from your repo automatically
	REPO_ID = "SALEETAI/Qwen-Coding-Model-GGUF"
	FILENAME = "qwen2.5-coder-7b-instruct.Q4_K_M.gguf"

	print(f"📦 Fetching model from {REPO_ID}...")
	model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)

	# ==========================================
	# 2. INITIALIZE LLM (Optimized for CPU)
	# ==========================================
	llm = Llama(
	model_path=model_path,
	n_ctx=2048, # Context window (Adjustable)
	n_threads=4, # Matches HF Free Tier CPU cores
	verbose=False
	)

	# ==========================================
	# 3. PROFESSIONAL INFERENCE LOGIC
	# ==========================================
	def chat_engine(message, history):
	# Professional Qwen Chat Template Construction
	prompt = "<\|im_start\|>system\nYou are an expert software architect specializing in Rust and C++.<\|im_end\|>\n"

	for user_msg, assistant_msg in history:
	prompt += f"<\|im_start\|>user\n{user_msg}<\|im_end\|>\n<\|im_start\|>assistant\n{assistant_msg}<\|im_end\|>\n"

	prompt += f"<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	# Streaming implementation for "Boss Fight" code generation
	stream = llm(
	prompt,
	max_tokens=1024,
	stop=["<\|im_end\|>", "<\|endoftext\|>"],
	stream=True,
	temperature=0.4, # Your tuned temperature
	repeat_penalty=1.2, # Your tuned penalty
	)

	response = ""
	for output in stream:
	token = output["choices"][0]["text"]
	response += token
	yield response

	# ==========================================
	# 4. GRADIO PRODUCTION UI
	# ==========================================
	# Apply the soft theme at the global Blocks level to comply with Gradio 5.x architecture
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🚀 SALEETAI Coding Agent (Qwen-7B)")
	gr.Markdown("### Professional-grade code logic for Rust, C++, and complex architectural patterns.")

	gr.ChatInterface(
	fn=chat_engine,
	examples=[
	"Implement a thread-safe Lock-Free Stack in C++.",
	"Write a Doubly Linked List in safe Rust.",
	"Optimize a Python script for high-density data processing."
	],
	cache_examples=False,
	)

	if __name__ == "__main__":
	demo.launch()