AIencoder commited on
Commit
46b5803
·
verified ·
1 Parent(s): 4667b6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -54
app.py CHANGED
@@ -1,28 +1,22 @@
1
  import gradio as gr
2
- from huggingface_hub import hf_hub_download
3
- from llama_cpp import Llama
4
- import os
5
 
6
- # Download model on startup
7
- print("Downloading model...")
8
- model_path = hf_hub_download(
9
- repo_id="Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
10
- filename="qwen2.5-coder-3b-instruct-q4_k_m.gguf",
11
- cache_dir="/home/user/.cache"
12
- )
13
- print(f"Model downloaded: {model_path}")
14
 
15
- # Load model
16
- print("Loading model...")
17
- llm = Llama(
18
- model_path=model_path,
19
- n_ctx=4096,
20
- n_threads=4,
21
- verbose=False
22
- )
23
- print("Model ready!")
24
 
25
  def chat_stream(message, history, temperature):
 
 
 
 
26
  messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
27
 
28
  for user_msg, assistant_msg in history:
@@ -33,79 +27,90 @@ def chat_stream(message, history, temperature):
33
  messages.append({"role": "user", "content": message})
34
 
35
  try:
36
- response = llm.create_chat_completion(
37
- messages=messages,
38
- temperature=temperature,
39
- max_tokens=2048,
40
- stream=True
41
  )
42
 
43
  full = ""
44
- for chunk in response:
45
- delta = chunk["choices"][0]["delta"]
46
- if "content" in delta:
47
- full += delta["content"]
48
- yield full
 
 
 
 
49
  except Exception as e:
50
  yield f"Error: {e}"
51
 
52
  def generate_code(prompt, language):
53
  if not prompt.strip():
54
  return "Please describe what you want."
 
 
55
 
56
  full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
57
 
58
  try:
59
- response = llm.create_chat_completion(
60
- messages=[{"role": "user", "content": full_prompt}],
61
- temperature=0.3,
62
- max_tokens=2048
63
  )
64
- result = response["choices"][0]["message"]["content"]
65
- if "```" in result:
66
- parts = result.split("```")
67
- if len(parts) >= 2:
68
- code = parts[1]
69
- if "\n" in code:
70
- code = code.split("\n", 1)[-1]
71
- return code.strip()
72
- return result
 
 
73
  except Exception as e:
74
  return f"Error: {e}"
75
 
76
  def explain_code(code):
77
  if not code.strip():
78
  return "Paste code to explain."
 
 
79
 
80
  try:
81
- response = llm.create_chat_completion(
82
- messages=[{"role": "user", "content": f"Explain this code:\n```\n{code}\n```"}],
83
- temperature=0.5,
84
- max_tokens=2048
85
  )
86
- return response["choices"][0]["message"]["content"]
87
  except Exception as e:
88
  return f"Error: {e}"
89
 
90
  def fix_code(code, error):
91
  if not code.strip():
92
  return "Paste code to fix."
 
 
93
 
94
  prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
95
 
96
  try:
97
- response = llm.create_chat_completion(
98
- messages=[{"role": "user", "content": prompt}],
99
- temperature=0.3,
100
- max_tokens=2048
101
  )
102
- return response["choices"][0]["message"]["content"]
103
  except Exception as e:
104
  return f"Error: {e}"
105
 
106
  with gr.Blocks(title="GOD Coding Machine", theme=gr.themes.Soft(primary_hue="purple")) as demo:
107
 
108
- gr.Markdown("# 🔥 GOD Coding Machine\n**Qwen2.5-Coder-3B** Running locally • No rate limits!")
109
 
110
  temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="Temperature")
111
 
 
1
  import gradio as gr
2
+ import requests
3
+ import json
 
4
 
5
+ OLLAMA_URL = "http://localhost:11434"
6
+ MODEL = "qwen2.5-coder:3b"
 
 
 
 
 
 
7
 
8
+ def check_ollama():
9
+ try:
10
+ r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
11
+ return r.status_code == 200
12
+ except:
13
+ return False
 
 
 
14
 
15
  def chat_stream(message, history, temperature):
16
+ if not check_ollama():
17
+ yield "⏳ Ollama starting... wait 30 seconds and try again."
18
+ return
19
+
20
  messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
21
 
22
  for user_msg, assistant_msg in history:
 
27
  messages.append({"role": "user", "content": message})
28
 
29
  try:
30
+ response = requests.post(
31
+ f"{OLLAMA_URL}/api/chat",
32
+ json={"model": MODEL, "messages": messages, "stream": True, "options": {"temperature": temperature}},
33
+ stream=True, timeout=300
 
34
  )
35
 
36
  full = ""
37
+ for line in response.iter_lines():
38
+ if line:
39
+ try:
40
+ data = json.loads(line)
41
+ if "message" in data:
42
+ full += data["message"].get("content", "")
43
+ yield full
44
+ except:
45
+ continue
46
  except Exception as e:
47
  yield f"Error: {e}"
48
 
49
  def generate_code(prompt, language):
50
  if not prompt.strip():
51
  return "Please describe what you want."
52
+ if not check_ollama():
53
+ return "⏳ Ollama starting..."
54
 
55
  full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
56
 
57
  try:
58
+ r = requests.post(
59
+ f"{OLLAMA_URL}/api/generate",
60
+ json={"model": MODEL, "prompt": full_prompt, "stream": False, "options": {"temperature": 0.3}},
61
+ timeout=300
62
  )
63
+ if r.status_code == 200:
64
+ result = r.json().get("response", "")
65
+ if "```" in result:
66
+ parts = result.split("```")
67
+ if len(parts) >= 2:
68
+ code = parts[1]
69
+ if "\n" in code:
70
+ code = code.split("\n", 1)[-1]
71
+ return code.strip()
72
+ return result
73
+ return f"Error: {r.text}"
74
  except Exception as e:
75
  return f"Error: {e}"
76
 
77
  def explain_code(code):
78
  if not code.strip():
79
  return "Paste code to explain."
80
+ if not check_ollama():
81
+ return "⏳ Ollama starting..."
82
 
83
  try:
84
+ r = requests.post(
85
+ f"{OLLAMA_URL}/api/generate",
86
+ json={"model": MODEL, "prompt": f"Explain this code:\n```\n{code}\n```", "stream": False},
87
+ timeout=300
88
  )
89
+ return r.json().get("response", "") if r.status_code == 200 else f"Error: {r.text}"
90
  except Exception as e:
91
  return f"Error: {e}"
92
 
93
  def fix_code(code, error):
94
  if not code.strip():
95
  return "Paste code to fix."
96
+ if not check_ollama():
97
+ return "⏳ Ollama starting..."
98
 
99
  prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
100
 
101
  try:
102
+ r = requests.post(
103
+ f"{OLLAMA_URL}/api/generate",
104
+ json={"model": MODEL, "prompt": prompt, "stream": False, "options": {"temperature": 0.3}},
105
+ timeout=300
106
  )
107
+ return r.json().get("response", "") if r.status_code == 200 else f"Error: {r.text}"
108
  except Exception as e:
109
  return f"Error: {e}"
110
 
111
  with gr.Blocks(title="GOD Coding Machine", theme=gr.themes.Soft(primary_hue="purple")) as demo:
112
 
113
+ gr.Markdown("# 🔥 GOD Coding Machine\n**Docker Edition** • Qwen2.5-Coder running locally • No rate limits!")
114
 
115
  temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="Temperature")
116