Spaces:

sayalimetkar
/

optimized_model

Runtime error

App Files Files Community

sayalimetkar commited on about 1 month ago

Commit

ec41420

verified ·

1 Parent(s): 8cff1cd

Upload 2 files

Browse files

Files changed (2) hide show

app.py +254 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,254 @@

+from ctransformers import AutoModelForCausalLM
+from llama_cpp import Llama
+import gradio as gr
+import re
+import threading
+# ==============================
+# LOAD MODELS – OPTIMAL SPEED
+# ==============================
+print("Loading Mistral from HuggingFace Hub...")
+mistral_model = AutoModelForCausalLM.from_pretrained(
+    # r"C:\Users\ksrvisitor\Downloads\optimizationmodel\quant_model.gguf",
+    "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
+    model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
+    model_type="mistral",
+    threads=8,
+    batch_size=512,
+    context_length=8192,
+    gpu_layers=0,
+    temperature=0.7,
+    top_p=0.9,
+    top_k=30,
+    repetition_penalty=1.1,
+    max_new_tokens=1024
+)
+print("Loading Qwen2.5-Coder from HuggingFace Hub...")
+qwen_model = Llama(
+    model_path="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
+    model_file="qwen2.5-coder-7b-instruct-q4_k_m.gguf",
+    n_ctx=8192,
+    n_threads=4,       # Fastest on CPU
+    n_batch=512,       # Fastest on CPU
+    n_gpu_layers=0,    # Change to 35–99 if GPU
+    use_mlock=True,
+    verbose=False
+)
+stop_event = threading.Event()
+# ==============================
+# SMART DETECTION
+# ==============================
+# ==============================
+# BULLETPROOF CODE DETECTION (Qwen will catch EVERYTHING now)
+# ==============================
+# ==============================
+# BULLETPROOF DETECTION — MATH + CODE = ALWAYS QWEN
+# ==============================
+def is_coding_or_math(text: str) -> bool:
+    text = text.lower()
+    # Math & number series triggers
+    math_triggers = [
+    # General math
+    "next number", "series", "sequence", "pattern", "find the next",
+    "solve", "calculate", "equation", "math", "mathematics", "integral",
+    "derivative", "limit", "factorial", "prime", "composite",
+    "geometry", "algebra", "probability", "statistics", "number",
+    "compute", "simplify", "evaluate", "expression", "fraction",
+    "decimal", "percentage", "ratio", "proportion", "root", "square root",
+    "logarithm", "log", "ln", "exponent", "power", "base",
+    "matrix", "determinant", "vector", "dot product", "cross product",
+    "trigonometry", "sine", "cosine", "tan", "cot", "sec", "cosec",
+    "triangle", "circle", "radius", "diameter", "area", "perimeter",
+    "volume", "surface area", "integrate", "differentiate",
+    "quadratic", "polynomial", "cubic", "linear equation",
+    "graph", "intercept", "slope", "intersection", "domain", "range",
+    "modulus", "absolute", "complex number", "imaginary", "real number",
+    "mean", "median", "mode", "variance", "standard deviation",
+    "correlation", "regression", "distribution", "normal distribution",
+    "binomial", "poisson", "combinatorics", "permutation", "combination",
+    "set theory", "subset", "union", "intersection", "probability of",
+]
+    # Coding triggers
+    code_triggers = [
+    # General programming
+    "code", "program", "coding", "script", "implement", "build",
+    "function", "method", "class", "object", "module", "package",
+    "syntax", "runtime", "variable", "parameter", "argument",
+    "return", "loop", "for loop", "while loop", "if statement",
+    "condition", "boolean", "string", "array", "list", "dictionary",
+    "hashmap", "tuple", "stack", "queue", "tree", "graph", "linked list",
+    "pointer", "reference", "memory", "heap", "stack memory",
+    # Languages
+    "python", "java", "javascript", "typescript", "c++", "c#", "c language",
+    "go", "rust", "php", "sql", "html", "css", "react", "nodejs",
+    "json", "xml", "yaml", "bash", "shell script",
+    # Data science / ML
+    "pandas", "numpy", "sklearn", "tensorflow", "pytorch",
+    "dataframe", "dataset", "model training", "machine learning",
+    "neural network", "deep learning",
+    # Debugging & errors
+    "debug", "traceback", "error", "bug", "fix this code",
+    "segmentation fault", "stack overflow", "undefined variable",
+    # Algorithms
+    "algorithm", "time complexity", "space complexity",
+    "big o notation", "sort", "merge sort", "quick sort",
+    "binary search", "dynamic programming", "recursion",
+    "graph traversal", "dfs", "bfs", "greedy algorithm",
+    # DevOps / tools
+    "docker", "kubernetes", "api", "rest api", "jwt",
+    "server", "client", "database", "mongodb", "mysql",
+    "postgres", "ORM", "deploy", "deployment", "kafka",
+    # Competitive coding
+    "leetcode", "hackerrank", "codechef", "geeksforgeeks"
+]
+    # If any math or code keyword is found → Qwen
+    if any(trigger in text for trigger in math_triggers + code_triggers):
+        return True
+    # If contains numbers + math symbols → Qwen
+    if re.search(r'\d', text) and any(op in text for op in "+-*/=^()[]{}"):
+        return True
+    # If contains comma-separated numbers (like 2, 6, 12, 20) → Qwen
+    if re.search(r'\d+\s*[,]\s*\d+', text):
+        return True
+    return False
+# ==============================
+# FIXED STREAMING (NO ECHOING!)
+# ==============================
+def stream_mistral(prompt):
+    stop_event.clear()
+    system_prompt = (
+        "You are a helpful, concise assistant. "
+        "Do NOT repeat the user's question. "
+        "Answer directly and clearly."
+    )
+    formatted_prompt = f"<s>[INST] <<SYS>>{system_prompt}<</SYS>> {prompt} [/INST]"
+    yield [{"role": "assistant", "content": "**[Mistral]**\n\n"}]
+    output = ""
+    for token in mistral_model(
+        formatted_prompt,
+        stream=True,
+        max_new_tokens=800,
+        stop=["</s>"]
+    ):
+        if stop_event.is_set():
+            break
+        output += token
+        clean = output.strip()
+        yield [{"role": "assistant", "content": f"**[Mistral]**\n\n{clean}"}]
+def stream_qwen(prompt):
+    stop_event.clear()
+    resp = ""
+    # Start output
+    yield [{"role": "assistant", "content": "**[Qwen2.5-Coder]**\n\n"}]
+    formatted = (
+        "<|im_start|>system\n"
+        "You are a world-class math and coding assistant. "
+        "ALWAYS respond with clean LaTeX. Use $...$ for inline and $$...$$ for display. "
+        "Use \\boxed{} for final answers.\n"
+        "<|im_end|>\n"
+        "<|im_start|>user\n" + prompt + "\n<|im_end|>\n"
+        "<|im_start|>assistant\n"
+    )
+    for chunk in qwen_model(
+        formatted,
+        stream=True,
+        max_tokens=800,
+        temperature=0.1,
+        top_p=0.9,
+        top_k=20,
+        repeat_penalty=1.05
+    ):
+        if stop_event.is_set():
+            break
+        # SAFE EXTRACTION — won't crash
+        choice = chunk["choices"][0]
+        token = (
+            choice.get("text") or
+            choice.get("delta", {}).get("content", "") or
+            ""
+        )
+        resp += token
+        yield [{"role": "assistant", "content": f"**[Qwen2.5-Coder]**\n\n{resp}"}]
+# ==============================
+# MAIN CHAT — WORKS WITH MESSAGES FORMAT
+# ==============================
+def chat(message, history):
+    stop_event.clear()
+    # Handle history as list of dicts (Gradio's type="messages")
+    messages = []
+    for msg in history:
+        if isinstance(msg, dict) and "role" in msg:
+            messages.append(msg)
+        else:
+            # Fallback for tuples (old format)
+            for u, a in msg if isinstance(msg, (list, tuple)) else []:
+                if u: messages.append({"role": "user", "content": u})
+                if a: messages.append({"role": "assistant", "content": a})
+    messages.append({"role": "user", "content": message})
+    streamer = stream_qwen(message) if is_coding_or_math(message) else stream_mistral(message)
+    partial = messages.copy()
+    first = True
+    for chunk in streamer:
+        if stop_event.is_set(): break
+        if first:
+            partial.append(chunk[0])
+            first = False
+        else:
+            partial[-1] = chunk[0]
+        yield partial
+def stop():
+    stop_event.set()
+# ==============================
+# UI
+# ==============================
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Dual Local AI — Clean Responses (No Echoing!)\n**Code/Math → Qwen2.5-Coder** | **Chat → Mistral**")
+    chatbot = gr.Chatbot(height=720, type="messages", show_copy_button=True)
+    with gr.Row():
+        txt = gr.Textbox(placeholder="Ask anything…", label="Message", lines=4, scale=8)
+        send = gr.Button("Send", variant="primary")
+        stop_btn = gr.Button("Stop", variant="stop")
+    send.click(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt)
+    txt.submit(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt)
+    stop_btn.click(stop)
+print("Launching FINAL version (no echoing, no crashes)...")
+demo.launch(server_port=7860, inbrowser=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+ctransformers==0.2.27
+llama-cpp-python==0.2.79
+gradio==4.31.5