SALEETAI commited on
Commit
0b79f85
·
verified ·
1 Parent(s): 003c00d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -23
app.py CHANGED
@@ -1,37 +1,24 @@
1
  import gradio as gr
2
  from llama_cpp import Llama
3
- from huggingface_hub import hf_hub_download
4
 
5
- # 1. Download GGUF file from your Repo
6
- model_path = hf_hub_download(
7
- repo_id="SALEETAI/coding-agent-qwen-sft-v3-GGUF",
8
- filename="coding-agent-qwen-sft-v3-GGUF.q4_k_m.gguf"
9
- )
10
 
11
- # 2. Initialize the model
12
- # We set n_ctx to 2048 for good performance on free CPU
13
- llm = Llama(model_path=model_path, n_ctx=2048, verbose=False)
14
 
15
- def generate_code(prompt):
16
- # Professional inference parameters
 
17
  output = llm(
18
  f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
19
  max_tokens=1024,
20
  stop=["<|im_end|>"],
21
- echo=False,
22
  repeat_penalty=1.2,
23
  temperature=0.4
24
  )
25
  return output['choices'][0]['text']
26
 
27
- # 3. Create Gradio UI
28
- demo = gr.Interface(
29
- fn=generate_code,
30
- inputs=gr.Textbox(lines=5, placeholder="Enter a coding prompt..."),
31
- outputs=gr.Code(language="python"),
32
- title="🚀 Coding Agent Qwen-SFT v2 (GGUF Demo)",
33
- description="Running on Free CPU using llama-cpp-python."
34
- )
35
-
36
- if __name__ == "__main__":
37
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  from llama_cpp import Llama
3
+ import os
4
 
5
+ # Path is now local because we downloaded it in Dockerfile
6
+ model_path = "/app/coding-agent-qwen-sft-v3-GGUF.q4_k_m.gguf"
 
 
 
7
 
8
+ print(f"Checking for model at {model_path}...")
9
+ if not os.path.exists(model_path):
10
+ print("MODEL NOT FOUND!")
11
 
12
+ llm = Llama(model_path=model_path, n_ctx=2048)
13
+
14
+ def generate(prompt):
15
  output = llm(
16
  f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n",
17
  max_tokens=1024,
18
  stop=["<|im_end|>"],
 
19
  repeat_penalty=1.2,
20
  temperature=0.4
21
  )
22
  return output['choices'][0]['text']
23
 
24
+ gr.ChatInterface(generate).launch(server_name="0.0.0.0", server_port=7860)