Spaces:

w1r4
/

abap-coder-gguf

Sleeping

App Files Files Community

abap-coder-gguf / app.py

w1r4

Update app.py

e57ebc3 verified about 2 months ago

raw

history blame contribute delete

3.03 kB

	import os
	import subprocess
	import sys

	# --- 1. Force Install the Correct CPU Version (Runtime Install) ---
	def install_llama():
	try:
	import llama_cpp
	print("llama-cpp-python is already installed.")
	except ImportError:
	print("Installing llama-cpp-python for CPU...")
	# We use the specific Index URL for CPU wheels to avoid compiling
	subprocess.check_call([
	sys.executable, "-m", "pip", "install",
	"llama-cpp-python",
	"--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
	])
	print("Installation complete!")

	install_llama()


	import gradio as gr
	from huggingface_hub import hf_hub_download
	from llama_cpp import Llama

	# 1. Configuration
	REPO_ID = "mradermacher/qwen-coder-abap-v6-GGUF"
	FILENAME = "qwen-coder-abap-v6.Q4_K_M.gguf" # Best balance of speed/quality

	# 2. Download the Model (Cached automatically by HF)
	print(f"Downloading {FILENAME} from {REPO_ID}...")
	model_path = hf_hub_download(
	repo_id=REPO_ID,
	filename=FILENAME
	)

	# 3. Load the Model
	# n_ctx=8192 allows for long ABAP code files
	# n_threads=2 is optimal for the free HF Spaces tier
	print("Loading model into memory...")
	llm = Llama(
	model_path=model_path,
	n_ctx=8192,
	n_threads=2,
	verbose=False
	)

	# 4. The Generation Function
	def generate_abap(message, history):
	# System prompt to enforce ABAP context
	system_prompt = "You are an expert ABAP developer. Write modern ABAP 7.4+ code where possible."

	# Construct the prompt using Qwen's ChatML format
	# <\|im_start\|>system...<\|im_end\|><\|im_start\|>user...<\|im_end\|><\|im_start\|>assistant
	prompt = f"<\|im_start\|>system\n{system_prompt}<\|im_end\|>\n"

	# Add history to keep context (optional, but good for chat)
	for user_msg, bot_msg in history:
	prompt += f"<\|im_start\|>user\n{user_msg}<\|im_end\|>\n<\|im_start\|>assistant\n{bot_msg}<\|im_end\|>\n"

	# Add current message
	prompt += f"<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"

	# Streaming generation (Characters appear as they are generated)
	output_stream = llm(
	prompt,
	max_tokens=1024, # Max length of answer
	stop=["<\|im_end\|>"], # Stop when finished
	stream=True, # Enable streaming
	temperature=0.1, # Precise code
	top_p=0.9
	)

	partial_message = ""
	for chunk in output_stream:
	delta = chunk['choices'][0]['text']
	partial_message += delta
	yield partial_message

	# 5. The Gradio Interface
	demo = gr.ChatInterface(
	fn=generate_abap,
	title="ABAP Coder (Qwen 2.5 GGUF)",
	description="Ask for ABAP Reports, CDS Views, or Classes. Running on CPU.",
	examples=[
	"Write a report to select data from MARA using inline declarations.",
	"Create a CDS View for sales orders joining VBAK and VBAP.",
	"Explain how to use FIELD-SYMBOLS in a LOOP."
	],
	)

	# 6. Launch
	if __name__ == "__main__":
	demo.launch()