Spaces:

pradeeparul2
/

qwen-coder

Build error

qwen-coder / app.py

Update app.py

c4f569f verified about 2 months ago

677 Bytes

	import os
	import gradio as gr
	from llama_cpp import Llama

	# No cache redirect needed for GGUF
	model_path = "Qwen/Qwen2.5-Coder-14B-Instruct-GGUF" # Auto-downloads Q4_K_M (~9GB)
	llm = Llama(
	model_path,
	n_ctx=4096, # Adjust for coding tasks
	n_gpu_layers=99, # Offload to T4 GPU
	verbose=False
	)

	def chat(message, history):
	response = llm.create_chat_completion(
	messages=[{"role": "user", "content": message}],
	max_tokens=512,
	temperature=0.7
	)
	history.append((message, response["choices"][0]["message"]["content"]))
	return history, ""

	demo = gr.ChatInterface(chat)
	if __name__ == "__main__":
	demo.launch()