abap-coder-gguf / app.py
w1r4's picture
Update app.py
e57ebc3 verified
import os
import subprocess
import sys
# --- 1. Force Install the Correct CPU Version (Runtime Install) ---
def install_llama():
try:
import llama_cpp
print("llama-cpp-python is already installed.")
except ImportError:
print("Installing llama-cpp-python for CPU...")
# We use the specific Index URL for CPU wheels to avoid compiling
subprocess.check_call([
sys.executable, "-m", "pip", "install",
"llama-cpp-python",
"--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu"
])
print("Installation complete!")
install_llama()
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# 1. Configuration
REPO_ID = "mradermacher/qwen-coder-abap-v6-GGUF"
FILENAME = "qwen-coder-abap-v6.Q4_K_M.gguf" # Best balance of speed/quality
# 2. Download the Model (Cached automatically by HF)
print(f"Downloading {FILENAME} from {REPO_ID}...")
model_path = hf_hub_download(
repo_id=REPO_ID,
filename=FILENAME
)
# 3. Load the Model
# n_ctx=8192 allows for long ABAP code files
# n_threads=2 is optimal for the free HF Spaces tier
print("Loading model into memory...")
llm = Llama(
model_path=model_path,
n_ctx=8192,
n_threads=2,
verbose=False
)
# 4. The Generation Function
def generate_abap(message, history):
# System prompt to enforce ABAP context
system_prompt = "You are an expert ABAP developer. Write modern ABAP 7.4+ code where possible."
# Construct the prompt using Qwen's ChatML format
# <|im_start|>system...<|im_end|><|im_start|>user...<|im_end|><|im_start|>assistant
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n"
# Add history to keep context (optional, but good for chat)
for user_msg, bot_msg in history:
prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{bot_msg}<|im_end|>\n"
# Add current message
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
# Streaming generation (Characters appear as they are generated)
output_stream = llm(
prompt,
max_tokens=1024, # Max length of answer
stop=["<|im_end|>"], # Stop when finished
stream=True, # Enable streaming
temperature=0.1, # Precise code
top_p=0.9
)
partial_message = ""
for chunk in output_stream:
delta = chunk['choices'][0]['text']
partial_message += delta
yield partial_message
# 5. The Gradio Interface
demo = gr.ChatInterface(
fn=generate_abap,
title="ABAP Coder (Qwen 2.5 GGUF)",
description="Ask for ABAP Reports, CDS Views, or Classes. Running on CPU.",
examples=[
"Write a report to select data from MARA using inline declarations.",
"Create a CDS View for sales orders joining VBAK and VBAP.",
"Explain how to use FIELD-SYMBOLS in a LOOP."
],
)
# 6. Launch
if __name__ == "__main__":
demo.launch()