AIencoder commited on
Commit
a361531
·
verified ·
1 Parent(s): 1ecd1cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -59
app.py CHANGED
@@ -1,22 +1,28 @@
1
  import gradio as gr
2
- import requests
3
- import json
 
4
 
5
- OLLAMA_URL = "http://localhost:11434"
6
- MODEL = "qwen2.5-coder:3b"
 
 
 
 
 
 
7
 
8
- def check_ollama():
9
- try:
10
- r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=5)
11
- return r.status_code == 200
12
- except:
13
- return False
 
 
 
14
 
15
  def chat_stream(message, history, temperature):
16
- if not check_ollama():
17
- yield "⏳ Ollama starting... wait 30 seconds and try again."
18
- return
19
-
20
  messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
21
 
22
  for user_msg, assistant_msg in history:
@@ -27,90 +33,79 @@ def chat_stream(message, history, temperature):
27
  messages.append({"role": "user", "content": message})
28
 
29
  try:
30
- response = requests.post(
31
- f"{OLLAMA_URL}/api/chat",
32
- json={"model": MODEL, "messages": messages, "stream": True, "options": {"temperature": temperature}},
33
- stream=True, timeout=300
 
34
  )
35
 
36
  full = ""
37
- for line in response.iter_lines():
38
- if line:
39
- try:
40
- data = json.loads(line)
41
- if "message" in data:
42
- full += data["message"].get("content", "")
43
- yield full
44
- except:
45
- continue
46
  except Exception as e:
47
  yield f"Error: {e}"
48
 
49
  def generate_code(prompt, language):
50
  if not prompt.strip():
51
  return "Please describe what you want."
52
- if not check_ollama():
53
- return "⏳ Ollama starting..."
54
 
55
  full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
56
 
57
  try:
58
- r = requests.post(
59
- f"{OLLAMA_URL}/api/generate",
60
- json={"model": MODEL, "prompt": full_prompt, "stream": False, "options": {"temperature": 0.3}},
61
- timeout=300
62
  )
63
- if r.status_code == 200:
64
- result = r.json().get("response", "")
65
- if "```" in result:
66
- parts = result.split("```")
67
- if len(parts) >= 2:
68
- code = parts[1]
69
- if "\n" in code:
70
- code = code.split("\n", 1)[-1]
71
- return code.strip()
72
- return result
73
- return f"Error: {r.text}"
74
  except Exception as e:
75
  return f"Error: {e}"
76
 
77
  def explain_code(code):
78
  if not code.strip():
79
  return "Paste code to explain."
80
- if not check_ollama():
81
- return "⏳ Ollama starting..."
82
 
83
  try:
84
- r = requests.post(
85
- f"{OLLAMA_URL}/api/generate",
86
- json={"model": MODEL, "prompt": f"Explain this code:\n```\n{code}\n```", "stream": False},
87
- timeout=300
88
  )
89
- return r.json().get("response", "") if r.status_code == 200 else f"Error: {r.text}"
90
  except Exception as e:
91
  return f"Error: {e}"
92
 
93
  def fix_code(code, error):
94
  if not code.strip():
95
  return "Paste code to fix."
96
- if not check_ollama():
97
- return "⏳ Ollama starting..."
98
 
99
  prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
100
 
101
  try:
102
- r = requests.post(
103
- f"{OLLAMA_URL}/api/generate",
104
- json={"model": MODEL, "prompt": prompt, "stream": False, "options": {"temperature": 0.3}},
105
- timeout=300
106
  )
107
- return r.json().get("response", "") if r.status_code == 200 else f"Error: {r.text}"
108
  except Exception as e:
109
  return f"Error: {e}"
110
 
111
  with gr.Blocks(title="GOD Coding Machine", theme=gr.themes.Soft(primary_hue="purple")) as demo:
112
 
113
- gr.Markdown("# 🔥 GOD Coding Machine\n**Docker Edition** • Qwen2.5-Coder running locally • No rate limits!")
114
 
115
  temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="Temperature")
116
 
 
1
  import gradio as gr
2
+ from huggingface_hub import hf_hub_download
3
+ from llama_cpp import Llama
4
+ import os
5
 
6
+ # Download model on startup
7
+ print("Downloading model...")
8
+ model_path = hf_hub_download(
9
+ repo_id="Qwen/Qwen2.5-Coder-3B-Instruct-GGUF",
10
+ filename="qwen2.5-coder-3b-instruct-q4_k_m.gguf",
11
+ cache_dir="/home/user/.cache"
12
+ )
13
+ print(f"Model downloaded: {model_path}")
14
 
15
+ # Load model
16
+ print("Loading model...")
17
+ llm = Llama(
18
+ model_path=model_path,
19
+ n_ctx=4096,
20
+ n_threads=4,
21
+ verbose=False
22
+ )
23
+ print("Model ready!")
24
 
25
  def chat_stream(message, history, temperature):
 
 
 
 
26
  messages = [{"role": "system", "content": "You are an expert coding assistant. Always use markdown code blocks."}]
27
 
28
  for user_msg, assistant_msg in history:
 
33
  messages.append({"role": "user", "content": message})
34
 
35
  try:
36
+ response = llm.create_chat_completion(
37
+ messages=messages,
38
+ temperature=temperature,
39
+ max_tokens=2048,
40
+ stream=True
41
  )
42
 
43
  full = ""
44
+ for chunk in response:
45
+ delta = chunk["choices"][0]["delta"]
46
+ if "content" in delta:
47
+ full += delta["content"]
48
+ yield full
 
 
 
 
49
  except Exception as e:
50
  yield f"Error: {e}"
51
 
52
  def generate_code(prompt, language):
53
  if not prompt.strip():
54
  return "Please describe what you want."
 
 
55
 
56
  full_prompt = f"Write {language} code for: {prompt}\n\nOutput ONLY code in a markdown block."
57
 
58
  try:
59
+ response = llm.create_chat_completion(
60
+ messages=[{"role": "user", "content": full_prompt}],
61
+ temperature=0.3,
62
+ max_tokens=2048
63
  )
64
+ result = response["choices"][0]["message"]["content"]
65
+ if "```" in result:
66
+ parts = result.split("```")
67
+ if len(parts) >= 2:
68
+ code = parts[1]
69
+ if "\n" in code:
70
+ code = code.split("\n", 1)[-1]
71
+ return code.strip()
72
+ return result
 
 
73
  except Exception as e:
74
  return f"Error: {e}"
75
 
76
  def explain_code(code):
77
  if not code.strip():
78
  return "Paste code to explain."
 
 
79
 
80
  try:
81
+ response = llm.create_chat_completion(
82
+ messages=[{"role": "user", "content": f"Explain this code:\n```\n{code}\n```"}],
83
+ temperature=0.5,
84
+ max_tokens=2048
85
  )
86
+ return response["choices"][0]["message"]["content"]
87
  except Exception as e:
88
  return f"Error: {e}"
89
 
90
  def fix_code(code, error):
91
  if not code.strip():
92
  return "Paste code to fix."
 
 
93
 
94
  prompt = f"Fix this code:\n```\n{code}\n```\nError: {error or 'Not working'}"
95
 
96
  try:
97
+ response = llm.create_chat_completion(
98
+ messages=[{"role": "user", "content": prompt}],
99
+ temperature=0.3,
100
+ max_tokens=2048
101
  )
102
+ return response["choices"][0]["message"]["content"]
103
  except Exception as e:
104
  return f"Error: {e}"
105
 
106
  with gr.Blocks(title="GOD Coding Machine", theme=gr.themes.Soft(primary_hue="purple")) as demo:
107
 
108
+ gr.Markdown("# 🔥 GOD Coding Machine\n**Qwen2.5-Coder-3B** Running locally • No rate limits!")
109
 
110
  temperature = gr.Slider(0, 1, value=0.7, step=0.1, label="Temperature")
111