Spaces:
Sleeping
Sleeping
| import os | |
| import subprocess | |
| import sys | |
| # --- 1. Force Install the Correct CPU Version (Runtime Install) --- | |
| def install_llama(): | |
| try: | |
| import llama_cpp | |
| print("llama-cpp-python is already installed.") | |
| except ImportError: | |
| print("Installing llama-cpp-python for CPU...") | |
| # We use the specific Index URL for CPU wheels to avoid compiling | |
| subprocess.check_call([ | |
| sys.executable, "-m", "pip", "install", | |
| "llama-cpp-python", | |
| "--extra-index-url", "https://abetlen.github.io/llama-cpp-python/whl/cpu" | |
| ]) | |
| print("Installation complete!") | |
| install_llama() | |
| import gradio as gr | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| # 1. Configuration | |
| REPO_ID = "mradermacher/qwen-coder-abap-v6-GGUF" | |
| FILENAME = "qwen-coder-abap-v6.Q4_K_M.gguf" # Best balance of speed/quality | |
| # 2. Download the Model (Cached automatically by HF) | |
| print(f"Downloading {FILENAME} from {REPO_ID}...") | |
| model_path = hf_hub_download( | |
| repo_id=REPO_ID, | |
| filename=FILENAME | |
| ) | |
| # 3. Load the Model | |
| # n_ctx=8192 allows for long ABAP code files | |
| # n_threads=2 is optimal for the free HF Spaces tier | |
| print("Loading model into memory...") | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=8192, | |
| n_threads=2, | |
| verbose=False | |
| ) | |
| # 4. The Generation Function | |
| def generate_abap(message, history): | |
| # System prompt to enforce ABAP context | |
| system_prompt = "You are an expert ABAP developer. Write modern ABAP 7.4+ code where possible." | |
| # Construct the prompt using Qwen's ChatML format | |
| # <|im_start|>system...<|im_end|><|im_start|>user...<|im_end|><|im_start|>assistant | |
| prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n" | |
| # Add history to keep context (optional, but good for chat) | |
| for user_msg, bot_msg in history: | |
| prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{bot_msg}<|im_end|>\n" | |
| # Add current message | |
| prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" | |
| # Streaming generation (Characters appear as they are generated) | |
| output_stream = llm( | |
| prompt, | |
| max_tokens=1024, # Max length of answer | |
| stop=["<|im_end|>"], # Stop when finished | |
| stream=True, # Enable streaming | |
| temperature=0.1, # Precise code | |
| top_p=0.9 | |
| ) | |
| partial_message = "" | |
| for chunk in output_stream: | |
| delta = chunk['choices'][0]['text'] | |
| partial_message += delta | |
| yield partial_message | |
| # 5. The Gradio Interface | |
| demo = gr.ChatInterface( | |
| fn=generate_abap, | |
| title="ABAP Coder (Qwen 2.5 GGUF)", | |
| description="Ask for ABAP Reports, CDS Views, or Classes. Running on CPU.", | |
| examples=[ | |
| "Write a report to select data from MARA using inline declarations.", | |
| "Create a CDS View for sales orders joining VBAK and VBAP.", | |
| "Explain how to use FIELD-SYMBOLS in a LOOP." | |
| ], | |
| ) | |
| # 6. Launch | |
| if __name__ == "__main__": | |
| demo.launch() |