Spaces:
Runtime error
Runtime error
| from ctransformers import AutoModelForCausalLM | |
| import gradio as gr | |
| import re | |
| import threading | |
| # ============================== | |
| # LOAD MODELS β OPTIMAL SPEED | |
| # ============================== | |
| print("Loading Mistral from HuggingFace Hub...") | |
| mistral_model = AutoModelForCausalLM.from_pretrained( | |
| # r"C:\Users\ksrvisitor\Downloads\optimizationmodel\quant_model.gguf", | |
| "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", | |
| model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf", | |
| model_type="mistral", | |
| threads=8, | |
| batch_size=512, | |
| context_length=8192, | |
| gpu_layers=0, | |
| temperature=0.7, | |
| top_p=0.9, | |
| top_k=30, | |
| repetition_penalty=1.1, | |
| max_new_tokens=1024 | |
| ) | |
| print("Loading Qwen2.5-Coder from HuggingFace Hub...") | |
| qwen_model = Llama( | |
| model_path="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF", | |
| model_file="qwen2.5-coder-7b-instruct-q4_k_m.gguf", | |
| n_ctx=8192, | |
| n_threads=4, # Fastest on CPU | |
| n_batch=512, # Fastest on CPU | |
| n_gpu_layers=0, # Change to 35β99 if GPU | |
| use_mlock=True, | |
| verbose=False | |
| ) | |
| stop_event = threading.Event() | |
| # ============================== | |
| # SMART DETECTION | |
| # ============================== | |
| # ============================== | |
| # BULLETPROOF CODE DETECTION (Qwen will catch EVERYTHING now) | |
| # ============================== | |
| # ============================== | |
| # BULLETPROOF DETECTION β MATH + CODE = ALWAYS QWEN | |
| # ============================== | |
| def is_coding_or_math(text: str) -> bool: | |
| text = text.lower() | |
| # Math & number series triggers | |
| math_triggers = [ | |
| # General math | |
| "next number", "series", "sequence", "pattern", "find the next", | |
| "solve", "calculate", "equation", "math", "mathematics", "integral", | |
| "derivative", "limit", "factorial", "prime", "composite", | |
| "geometry", "algebra", "probability", "statistics", "number", | |
| "compute", "simplify", "evaluate", "expression", "fraction", | |
| "decimal", "percentage", "ratio", "proportion", "root", "square root", | |
| "logarithm", "log", "ln", "exponent", "power", "base", | |
| "matrix", "determinant", "vector", "dot product", "cross product", | |
| "trigonometry", "sine", "cosine", "tan", "cot", "sec", "cosec", | |
| "triangle", "circle", "radius", "diameter", "area", "perimeter", | |
| "volume", "surface area", "integrate", "differentiate", | |
| "quadratic", "polynomial", "cubic", "linear equation", | |
| "graph", "intercept", "slope", "intersection", "domain", "range", | |
| "modulus", "absolute", "complex number", "imaginary", "real number", | |
| "mean", "median", "mode", "variance", "standard deviation", | |
| "correlation", "regression", "distribution", "normal distribution", | |
| "binomial", "poisson", "combinatorics", "permutation", "combination", | |
| "set theory", "subset", "union", "intersection", "probability of", | |
| ] | |
| # Coding triggers | |
| code_triggers = [ | |
| # General programming | |
| "code", "program", "coding", "script", "implement", "build", | |
| "function", "method", "class", "object", "module", "package", | |
| "syntax", "runtime", "variable", "parameter", "argument", | |
| "return", "loop", "for loop", "while loop", "if statement", | |
| "condition", "boolean", "string", "array", "list", "dictionary", | |
| "hashmap", "tuple", "stack", "queue", "tree", "graph", "linked list", | |
| "pointer", "reference", "memory", "heap", "stack memory", | |
| # Languages | |
| "python", "java", "javascript", "typescript", "c++", "c#", "c language", | |
| "go", "rust", "php", "sql", "html", "css", "react", "nodejs", | |
| "json", "xml", "yaml", "bash", "shell script", | |
| # Data science / ML | |
| "pandas", "numpy", "sklearn", "tensorflow", "pytorch", | |
| "dataframe", "dataset", "model training", "machine learning", | |
| "neural network", "deep learning", | |
| # Debugging & errors | |
| "debug", "traceback", "error", "bug", "fix this code", | |
| "segmentation fault", "stack overflow", "undefined variable", | |
| # Algorithms | |
| "algorithm", "time complexity", "space complexity", | |
| "big o notation", "sort", "merge sort", "quick sort", | |
| "binary search", "dynamic programming", "recursion", | |
| "graph traversal", "dfs", "bfs", "greedy algorithm", | |
| # DevOps / tools | |
| "docker", "kubernetes", "api", "rest api", "jwt", | |
| "server", "client", "database", "mongodb", "mysql", | |
| "postgres", "ORM", "deploy", "deployment", "kafka", | |
| # Competitive coding | |
| "leetcode", "hackerrank", "codechef", "geeksforgeeks" | |
| ] | |
| # If any math or code keyword is found β Qwen | |
| if any(trigger in text for trigger in math_triggers + code_triggers): | |
| return True | |
| # If contains numbers + math symbols β Qwen | |
| if re.search(r'\d', text) and any(op in text for op in "+-*/=^()[]{}"): | |
| return True | |
| # If contains comma-separated numbers (like 2, 6, 12, 20) β Qwen | |
| if re.search(r'\d+\s*[,]\s*\d+', text): | |
| return True | |
| return False | |
| # ============================== | |
| # FIXED STREAMING (NO ECHOING!) | |
| # ============================== | |
| def stream_mistral(prompt): | |
| stop_event.clear() | |
| system_prompt = ( | |
| "You are a helpful, concise assistant. " | |
| "Do NOT repeat the user's question. " | |
| "Answer directly and clearly." | |
| ) | |
| formatted_prompt = f"<s>[INST] <<SYS>>{system_prompt}<</SYS>> {prompt} [/INST]" | |
| yield [{"role": "assistant", "content": "**[Mistral]**\n\n"}] | |
| output = "" | |
| for token in mistral_model( | |
| formatted_prompt, | |
| stream=True, | |
| max_new_tokens=800, | |
| stop=["</s>"] | |
| ): | |
| if stop_event.is_set(): | |
| break | |
| output += token | |
| clean = output.strip() | |
| yield [{"role": "assistant", "content": f"**[Mistral]**\n\n{clean}"}] | |
| def stream_qwen(prompt): | |
| stop_event.clear() | |
| resp = "" | |
| # Start output | |
| yield [{"role": "assistant", "content": "**[Qwen2.5-Coder]**\n\n"}] | |
| formatted = ( | |
| "<|im_start|>system\n" | |
| "You are a world-class math and coding assistant. " | |
| "ALWAYS respond with clean LaTeX. Use $...$ for inline and $$...$$ for display. " | |
| "Use \\boxed{} for final answers.\n" | |
| "<|im_end|>\n" | |
| "<|im_start|>user\n" + prompt + "\n<|im_end|>\n" | |
| "<|im_start|>assistant\n" | |
| ) | |
| for chunk in qwen_model( | |
| formatted, | |
| stream=True, | |
| max_tokens=800, | |
| temperature=0.1, | |
| top_p=0.9, | |
| top_k=20, | |
| repeat_penalty=1.05 | |
| ): | |
| if stop_event.is_set(): | |
| break | |
| # SAFE EXTRACTION β won't crash | |
| choice = chunk["choices"][0] | |
| token = ( | |
| choice.get("text") or | |
| choice.get("delta", {}).get("content", "") or | |
| "" | |
| ) | |
| resp += token | |
| yield [{"role": "assistant", "content": f"**[Qwen2.5-Coder]**\n\n{resp}"}] | |
| # ============================== | |
| # MAIN CHAT β WORKS WITH MESSAGES FORMAT | |
| # ============================== | |
| def chat(message, history): | |
| stop_event.clear() | |
| # Handle history as list of dicts (Gradio's type="messages") | |
| messages = [] | |
| for msg in history: | |
| if isinstance(msg, dict) and "role" in msg: | |
| messages.append(msg) | |
| else: | |
| # Fallback for tuples (old format) | |
| for u, a in msg if isinstance(msg, (list, tuple)) else []: | |
| if u: messages.append({"role": "user", "content": u}) | |
| if a: messages.append({"role": "assistant", "content": a}) | |
| messages.append({"role": "user", "content": message}) | |
| streamer = stream_qwen(message) if is_coding_or_math(message) else stream_mistral(message) | |
| partial = messages.copy() | |
| first = True | |
| for chunk in streamer: | |
| if stop_event.is_set(): break | |
| if first: | |
| partial.append(chunk[0]) | |
| first = False | |
| else: | |
| partial[-1] = chunk[0] | |
| yield partial | |
| def stop(): | |
| stop_event.set() | |
| # ============================== | |
| # UI | |
| # ============================== | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# Dual Local AI β Clean Responses (No Echoing!)\n**Code/Math β Qwen2.5-Coder** | **Chat β Mistral**") | |
| chatbot = gr.Chatbot(height=720, type="messages", show_copy_button=True) | |
| with gr.Row(): | |
| txt = gr.Textbox(placeholder="Ask anythingβ¦", label="Message", lines=4, scale=8) | |
| send = gr.Button("Send", variant="primary") | |
| stop_btn = gr.Button("Stop", variant="stop") | |
| send.click(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt) | |
| txt.submit(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt) | |
| stop_btn.click(stop) | |
| print("Launching FINAL version (no echoing, no crashes)...") | |
| demo.launch(server_port=7860, inbrowser=True) |