SALEETAI commited on
Commit
d35d842
·
verified ·
1 Parent(s): bad7af7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ # ==========================================
6
+ # 1. MODEL CONFIGURATION
7
+ # ==========================================
8
+ # This downloads your specific model from your repo automatically
9
+ REPO_ID = "SALEETAI/Qwen-Coding-Model-GGUF"
10
+ FILENAME = "qwen2.5-coder-7b-instruct.Q4_K_M.gguf"
11
+
12
+ print(f"📦 Fetching model from {REPO_ID}...")
13
+ model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)
14
+
15
+ # ==========================================
16
+ # 2. INITIALIZE LLM (Optimized for CPU)
17
+ # ==========================================
18
+ llm = Llama(
19
+ model_path=model_path,
20
+ n_ctx=2048, # Context window (Adjustable)
21
+ n_threads=4, # Matches HF Free Tier CPU cores
22
+ verbose=False
23
+ )
24
+
25
+ # ==========================================
26
+ # 3. PROFESSIONAL INFERENCE LOGIC
27
+ # ==========================================
28
+ def chat_engine(message, history):
29
+ # Professional Qwen Chat Template Construction
30
+ prompt = "<|im_start|>system\nYou are an expert software architect specializing in Rust and C++.<|im_end|>\n"
31
+
32
+ for user_msg, assistant_msg in history:
33
+ prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
34
+
35
+ prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
36
+
37
+ # Streaming implementation for "Boss Fight" code generation
38
+ stream = llm(
39
+ prompt,
40
+ max_tokens=1024,
41
+ stop=["<|im_end|>", "<|endoftext|>"],
42
+ stream=True,
43
+ temperature=0.4, # Your tuned temperature
44
+ repeat_penalty=1.2, # Your tuned penalty
45
+ )
46
+
47
+ response = ""
48
+ for output in stream:
49
+ token = output["choices"][0]["text"]
50
+ response += token
51
+ yield response
52
+
53
+ # ==========================================
54
+ # 4. GRADIO PRODUCTION UI
55
+ # ==========================================
56
+ demo = gr.ChatInterface(
57
+ fn=chat_engine,
58
+ title="🚀 SALEETAI Coding Agent (Qwen-7B)",
59
+ description="Professional-grade code logic for Rust, C++, and complex architectural patterns.",
60
+ theme=gr.themes.Soft(),
61
+ examples=[
62
+ "Implement a thread-safe Lock-Free Stack in C++.",
63
+ "Write a Doubly Linked List in safe Rust.",
64
+ "Optimize a Python script for high-density data processing."
65
+ ],
66
+ cache_examples=False,
67
+ )
68
+
69
+ if __name__ == "__main__":
70
+ demo.launch(share=True)