sayalimetkar commited on
Commit
ec41420
Β·
verified Β·
1 Parent(s): 8cff1cd

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +254 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ctransformers import AutoModelForCausalLM
2
+ from llama_cpp import Llama
3
+ import gradio as gr
4
+ import re
5
+ import threading
6
+
7
+ # ==============================
8
+ # LOAD MODELS – OPTIMAL SPEED
9
+ # ==============================
10
+ print("Loading Mistral from HuggingFace Hub...")
11
+ mistral_model = AutoModelForCausalLM.from_pretrained(
12
+ # r"C:\Users\ksrvisitor\Downloads\optimizationmodel\quant_model.gguf",
13
+ "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
14
+ model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
15
+ model_type="mistral",
16
+ threads=8,
17
+ batch_size=512,
18
+ context_length=8192,
19
+ gpu_layers=0,
20
+ temperature=0.7,
21
+ top_p=0.9,
22
+ top_k=30,
23
+ repetition_penalty=1.1,
24
+ max_new_tokens=1024
25
+ )
26
+
27
+ print("Loading Qwen2.5-Coder from HuggingFace Hub...")
28
+ qwen_model = Llama(
29
+ model_path="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
30
+ model_file="qwen2.5-coder-7b-instruct-q4_k_m.gguf",
31
+ n_ctx=8192,
32
+ n_threads=4, # Fastest on CPU
33
+ n_batch=512, # Fastest on CPU
34
+ n_gpu_layers=0, # Change to 35–99 if GPU
35
+ use_mlock=True,
36
+ verbose=False
37
+ )
38
+
39
+ stop_event = threading.Event()
40
+
41
+ # ==============================
42
+ # SMART DETECTION
43
+ # ==============================
44
+ # ==============================
45
+ # BULLETPROOF CODE DETECTION (Qwen will catch EVERYTHING now)
46
+ # ==============================
47
+ # ==============================
48
+ # BULLETPROOF DETECTION β€” MATH + CODE = ALWAYS QWEN
49
+ # ==============================
50
+ def is_coding_or_math(text: str) -> bool:
51
+ text = text.lower()
52
+
53
+ # Math & number series triggers
54
+ math_triggers = [
55
+ # General math
56
+ "next number", "series", "sequence", "pattern", "find the next",
57
+ "solve", "calculate", "equation", "math", "mathematics", "integral",
58
+ "derivative", "limit", "factorial", "prime", "composite",
59
+ "geometry", "algebra", "probability", "statistics", "number",
60
+ "compute", "simplify", "evaluate", "expression", "fraction",
61
+ "decimal", "percentage", "ratio", "proportion", "root", "square root",
62
+ "logarithm", "log", "ln", "exponent", "power", "base",
63
+ "matrix", "determinant", "vector", "dot product", "cross product",
64
+ "trigonometry", "sine", "cosine", "tan", "cot", "sec", "cosec",
65
+ "triangle", "circle", "radius", "diameter", "area", "perimeter",
66
+ "volume", "surface area", "integrate", "differentiate",
67
+ "quadratic", "polynomial", "cubic", "linear equation",
68
+ "graph", "intercept", "slope", "intersection", "domain", "range",
69
+ "modulus", "absolute", "complex number", "imaginary", "real number",
70
+ "mean", "median", "mode", "variance", "standard deviation",
71
+ "correlation", "regression", "distribution", "normal distribution",
72
+ "binomial", "poisson", "combinatorics", "permutation", "combination",
73
+ "set theory", "subset", "union", "intersection", "probability of",
74
+ ]
75
+
76
+
77
+ # Coding triggers
78
+ code_triggers = [
79
+ # General programming
80
+ "code", "program", "coding", "script", "implement", "build",
81
+ "function", "method", "class", "object", "module", "package",
82
+ "syntax", "runtime", "variable", "parameter", "argument",
83
+ "return", "loop", "for loop", "while loop", "if statement",
84
+ "condition", "boolean", "string", "array", "list", "dictionary",
85
+ "hashmap", "tuple", "stack", "queue", "tree", "graph", "linked list",
86
+ "pointer", "reference", "memory", "heap", "stack memory",
87
+
88
+ # Languages
89
+ "python", "java", "javascript", "typescript", "c++", "c#", "c language",
90
+ "go", "rust", "php", "sql", "html", "css", "react", "nodejs",
91
+ "json", "xml", "yaml", "bash", "shell script",
92
+
93
+ # Data science / ML
94
+ "pandas", "numpy", "sklearn", "tensorflow", "pytorch",
95
+ "dataframe", "dataset", "model training", "machine learning",
96
+ "neural network", "deep learning",
97
+
98
+ # Debugging & errors
99
+ "debug", "traceback", "error", "bug", "fix this code",
100
+ "segmentation fault", "stack overflow", "undefined variable",
101
+
102
+ # Algorithms
103
+ "algorithm", "time complexity", "space complexity",
104
+ "big o notation", "sort", "merge sort", "quick sort",
105
+ "binary search", "dynamic programming", "recursion",
106
+ "graph traversal", "dfs", "bfs", "greedy algorithm",
107
+
108
+ # DevOps / tools
109
+ "docker", "kubernetes", "api", "rest api", "jwt",
110
+ "server", "client", "database", "mongodb", "mysql",
111
+ "postgres", "ORM", "deploy", "deployment", "kafka",
112
+
113
+ # Competitive coding
114
+ "leetcode", "hackerrank", "codechef", "geeksforgeeks"
115
+ ]
116
+
117
+
118
+ # If any math or code keyword is found β†’ Qwen
119
+ if any(trigger in text for trigger in math_triggers + code_triggers):
120
+ return True
121
+
122
+ # If contains numbers + math symbols β†’ Qwen
123
+ if re.search(r'\d', text) and any(op in text for op in "+-*/=^()[]{}"):
124
+ return True
125
+
126
+ # If contains comma-separated numbers (like 2, 6, 12, 20) β†’ Qwen
127
+ if re.search(r'\d+\s*[,]\s*\d+', text):
128
+ return True
129
+
130
+ return False
131
+
132
+ # ==============================
133
+ # FIXED STREAMING (NO ECHOING!)
134
+ # ==============================
135
+ def stream_mistral(prompt):
136
+ stop_event.clear()
137
+
138
+ system_prompt = (
139
+ "You are a helpful, concise assistant. "
140
+ "Do NOT repeat the user's question. "
141
+ "Answer directly and clearly."
142
+ )
143
+
144
+ formatted_prompt = f"<s>[INST] <<SYS>>{system_prompt}<</SYS>> {prompt} [/INST]"
145
+
146
+ yield [{"role": "assistant", "content": "**[Mistral]**\n\n"}]
147
+
148
+ output = ""
149
+ for token in mistral_model(
150
+ formatted_prompt,
151
+ stream=True,
152
+ max_new_tokens=800,
153
+ stop=["</s>"]
154
+ ):
155
+ if stop_event.is_set():
156
+ break
157
+
158
+ output += token
159
+ clean = output.strip()
160
+
161
+ yield [{"role": "assistant", "content": f"**[Mistral]**\n\n{clean}"}]
162
+
163
+ def stream_qwen(prompt):
164
+ stop_event.clear()
165
+ resp = ""
166
+
167
+ # Start output
168
+ yield [{"role": "assistant", "content": "**[Qwen2.5-Coder]**\n\n"}]
169
+
170
+ formatted = (
171
+ "<|im_start|>system\n"
172
+ "You are a world-class math and coding assistant. "
173
+ "ALWAYS respond with clean LaTeX. Use $...$ for inline and $$...$$ for display. "
174
+ "Use \\boxed{} for final answers.\n"
175
+ "<|im_end|>\n"
176
+ "<|im_start|>user\n" + prompt + "\n<|im_end|>\n"
177
+ "<|im_start|>assistant\n"
178
+ )
179
+
180
+ for chunk in qwen_model(
181
+ formatted,
182
+ stream=True,
183
+ max_tokens=800,
184
+ temperature=0.1,
185
+ top_p=0.9,
186
+ top_k=20,
187
+ repeat_penalty=1.05
188
+ ):
189
+ if stop_event.is_set():
190
+ break
191
+
192
+ # SAFE EXTRACTION β€” won't crash
193
+ choice = chunk["choices"][0]
194
+ token = (
195
+ choice.get("text") or
196
+ choice.get("delta", {}).get("content", "") or
197
+ ""
198
+ )
199
+
200
+ resp += token
201
+
202
+ yield [{"role": "assistant", "content": f"**[Qwen2.5-Coder]**\n\n{resp}"}]
203
+
204
+ # ==============================
205
+ # MAIN CHAT β€” WORKS WITH MESSAGES FORMAT
206
+ # ==============================
207
+ def chat(message, history):
208
+ stop_event.clear()
209
+
210
+ # Handle history as list of dicts (Gradio's type="messages")
211
+ messages = []
212
+ for msg in history:
213
+ if isinstance(msg, dict) and "role" in msg:
214
+ messages.append(msg)
215
+ else:
216
+ # Fallback for tuples (old format)
217
+ for u, a in msg if isinstance(msg, (list, tuple)) else []:
218
+ if u: messages.append({"role": "user", "content": u})
219
+ if a: messages.append({"role": "assistant", "content": a})
220
+ messages.append({"role": "user", "content": message})
221
+
222
+ streamer = stream_qwen(message) if is_coding_or_math(message) else stream_mistral(message)
223
+
224
+ partial = messages.copy()
225
+ first = True
226
+ for chunk in streamer:
227
+ if stop_event.is_set(): break
228
+ if first:
229
+ partial.append(chunk[0])
230
+ first = False
231
+ else:
232
+ partial[-1] = chunk[0]
233
+ yield partial
234
+
235
+ def stop():
236
+ stop_event.set()
237
+
238
+ # ==============================
239
+ # UI
240
+ # ==============================
241
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
242
+ gr.Markdown("# Dual Local AI β€” Clean Responses (No Echoing!)\n**Code/Math β†’ Qwen2.5-Coder** | **Chat β†’ Mistral**")
243
+ chatbot = gr.Chatbot(height=720, type="messages", show_copy_button=True)
244
+ with gr.Row():
245
+ txt = gr.Textbox(placeholder="Ask anything…", label="Message", lines=4, scale=8)
246
+ send = gr.Button("Send", variant="primary")
247
+ stop_btn = gr.Button("Stop", variant="stop")
248
+
249
+ send.click(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt)
250
+ txt.submit(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt)
251
+ stop_btn.click(stop)
252
+
253
+ print("Launching FINAL version (no echoing, no crashes)...")
254
+ demo.launch(server_port=7860, inbrowser=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ctransformers==0.2.27
2
+ llama-cpp-python==0.2.79
3
+ gradio==4.31.5