SALEETAI commited on
Commit
3424ab1
·
verified ·
1 Parent(s): 90dee38

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ # 1. Download GGUF file from your Repo
6
+ model_path = hf_hub_download(
7
+ repo_id="SALEETAI/coding-agent-qwen-sft-v2-GGUF",
8
+ filename="coding-agent-qwen-sft-v2-GGUF.q4_k_m.gguf"
9
+ )
10
+
11
+ # 2. Initialize the model
12
+ # We set n_ctx to 2048 for good performance on free CPU
13
+ llm = Llama(model_path=model_path, n_ctx=2048, verbose=False)
14
+
15
+ def generate_code(prompt):
16
+ # Professional inference parameters
17
+ output = llm(
18
+ f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
19
+ max_tokens=1024,
20
+ stop=["<|im_end|>"],
21
+ echo=False,
22
+ repeat_penalty=1.2,
23
+ temperature=0.4
24
+ )
25
+ return output['choices'][0]['text']
26
+
27
+ # 3. Create Gradio UI
28
+ demo = gr.Interface(
29
+ fn=generate_code,
30
+ inputs=gr.Textbox(lines=5, placeholder="Enter a coding prompt..."),
31
+ outputs=gr.Code(language="python"),
32
+ title="🚀 Coding Agent Qwen-SFT v2 (GGUF Demo)",
33
+ description="Running on Free CPU using llama-cpp-python."
34
+ )
35
+
36
+ if __name__ == "__main__":
37
+ demo.launch(server_name="0.0.0.0", server_port=7860)