Spaces:

sayalimetkar
/

optimized_model

Runtime error

App Files Files Community

optimized_model / app.py

sayalimetkar

Update app.py

16107a7 verified 20 days ago

raw

history blame contribute delete

8.7 kB

	from ctransformers import AutoModelForCausalLM
	import gradio as gr
	import re
	import threading

	# ==============================
	# LOAD MODELS – OPTIMAL SPEED
	# ==============================
	print("Loading Mistral from HuggingFace Hub...")
	mistral_model = AutoModelForCausalLM.from_pretrained(
	# r"C:\Users\ksrvisitor\Downloads\optimizationmodel\quant_model.gguf",
	"TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
	model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
	model_type="mistral",
	threads=8,
	batch_size=512,
	context_length=8192,
	gpu_layers=0,
	temperature=0.7,
	top_p=0.9,
	top_k=30,
	repetition_penalty=1.1,
	max_new_tokens=1024
	)

	print("Loading Qwen2.5-Coder from HuggingFace Hub...")
	qwen_model = Llama(
	model_path="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
	model_file="qwen2.5-coder-7b-instruct-q4_k_m.gguf",
	n_ctx=8192,
	n_threads=4, # Fastest on CPU
	n_batch=512, # Fastest on CPU
	n_gpu_layers=0, # Change to 35–99 if GPU
	use_mlock=True,
	verbose=False
	)

	stop_event = threading.Event()

	# ==============================
	# SMART DETECTION
	# ==============================
	# ==============================
	# BULLETPROOF CODE DETECTION (Qwen will catch EVERYTHING now)
	# ==============================
	# ==============================
	# BULLETPROOF DETECTION — MATH + CODE = ALWAYS QWEN
	# ==============================
	def is_coding_or_math(text: str) -> bool:
	text = text.lower()

	# Math & number series triggers
	math_triggers = [
	# General math
	"next number", "series", "sequence", "pattern", "find the next",
	"solve", "calculate", "equation", "math", "mathematics", "integral",
	"derivative", "limit", "factorial", "prime", "composite",
	"geometry", "algebra", "probability", "statistics", "number",
	"compute", "simplify", "evaluate", "expression", "fraction",
	"decimal", "percentage", "ratio", "proportion", "root", "square root",
	"logarithm", "log", "ln", "exponent", "power", "base",
	"matrix", "determinant", "vector", "dot product", "cross product",
	"trigonometry", "sine", "cosine", "tan", "cot", "sec", "cosec",
	"triangle", "circle", "radius", "diameter", "area", "perimeter",
	"volume", "surface area", "integrate", "differentiate",
	"quadratic", "polynomial", "cubic", "linear equation",
	"graph", "intercept", "slope", "intersection", "domain", "range",
	"modulus", "absolute", "complex number", "imaginary", "real number",
	"mean", "median", "mode", "variance", "standard deviation",
	"correlation", "regression", "distribution", "normal distribution",
	"binomial", "poisson", "combinatorics", "permutation", "combination",
	"set theory", "subset", "union", "intersection", "probability of",
	]


	# Coding triggers
	code_triggers = [
	# General programming
	"code", "program", "coding", "script", "implement", "build",
	"function", "method", "class", "object", "module", "package",
	"syntax", "runtime", "variable", "parameter", "argument",
	"return", "loop", "for loop", "while loop", "if statement",
	"condition", "boolean", "string", "array", "list", "dictionary",
	"hashmap", "tuple", "stack", "queue", "tree", "graph", "linked list",
	"pointer", "reference", "memory", "heap", "stack memory",

	# Languages
	"python", "java", "javascript", "typescript", "c++", "c#", "c language",
	"go", "rust", "php", "sql", "html", "css", "react", "nodejs",
	"json", "xml", "yaml", "bash", "shell script",

	# Data science / ML
	"pandas", "numpy", "sklearn", "tensorflow", "pytorch",
	"dataframe", "dataset", "model training", "machine learning",
	"neural network", "deep learning",

	# Debugging & errors
	"debug", "traceback", "error", "bug", "fix this code",
	"segmentation fault", "stack overflow", "undefined variable",

	# Algorithms
	"algorithm", "time complexity", "space complexity",
	"big o notation", "sort", "merge sort", "quick sort",
	"binary search", "dynamic programming", "recursion",
	"graph traversal", "dfs", "bfs", "greedy algorithm",

	# DevOps / tools
	"docker", "kubernetes", "api", "rest api", "jwt",
	"server", "client", "database", "mongodb", "mysql",
	"postgres", "ORM", "deploy", "deployment", "kafka",

	# Competitive coding
	"leetcode", "hackerrank", "codechef", "geeksforgeeks"
	]


	# If any math or code keyword is found → Qwen
	if any(trigger in text for trigger in math_triggers + code_triggers):
	return True

	# If contains numbers + math symbols → Qwen
	if re.search(r'\d', text) and any(op in text for op in "+-*/=^()[]{}"):
	return True

	# If contains comma-separated numbers (like 2, 6, 12, 20) → Qwen
	if re.search(r'\d+\s[,]\s\d+', text):
	return True

	return False

	# ==============================
	# FIXED STREAMING (NO ECHOING!)
	# ==============================
	def stream_mistral(prompt):
	stop_event.clear()

	system_prompt = (
	"You are a helpful, concise assistant. "
	"Do NOT repeat the user's question. "
	"Answer directly and clearly."
	)

	formatted_prompt = f"<s>[INST] <<SYS>>{system_prompt}<</SYS>> {prompt} [/INST]"

	yield [{"role": "assistant", "content": "[Mistral]\n\n"}]

	output = ""
	for token in mistral_model(
	formatted_prompt,
	stream=True,
	max_new_tokens=800,
	stop=["</s>"]
	):
	if stop_event.is_set():
	break

	output += token
	clean = output.strip()

	yield [{"role": "assistant", "content": f"[Mistral]\n\n{clean}"}]

	def stream_qwen(prompt):
	stop_event.clear()
	resp = ""

	# Start output
	yield [{"role": "assistant", "content": "[Qwen2.5-Coder]\n\n"}]

	formatted = (
	"<\|im_start\|>system\n"
	"You are a world-class math and coding assistant. "
	"ALWAYS respond with clean LaTeX. Use $...$ for inline and $$...$$ for display. "
	"Use \\boxed{} for final answers.\n"
	"<\|im_end\|>\n"
	"<\|im_start\|>user\n" + prompt + "\n<\|im_end\|>\n"
	"<\|im_start\|>assistant\n"
	)

	for chunk in qwen_model(
	formatted,
	stream=True,
	max_tokens=800,
	temperature=0.1,
	top_p=0.9,
	top_k=20,
	repeat_penalty=1.05
	):
	if stop_event.is_set():
	break

	# SAFE EXTRACTION — won't crash
	choice = chunk["choices"][0]
	token = (
	choice.get("text") or
	choice.get("delta", {}).get("content", "") or
	""
	)

	resp += token

	yield [{"role": "assistant", "content": f"[Qwen2.5-Coder]\n\n{resp}"}]

	# ==============================
	# MAIN CHAT — WORKS WITH MESSAGES FORMAT
	# ==============================
	def chat(message, history):
	stop_event.clear()

	# Handle history as list of dicts (Gradio's type="messages")
	messages = []
	for msg in history:
	if isinstance(msg, dict) and "role" in msg:
	messages.append(msg)
	else:
	# Fallback for tuples (old format)
	for u, a in msg if isinstance(msg, (list, tuple)) else []:
	if u: messages.append({"role": "user", "content": u})
	if a: messages.append({"role": "assistant", "content": a})
	messages.append({"role": "user", "content": message})

	streamer = stream_qwen(message) if is_coding_or_math(message) else stream_mistral(message)

	partial = messages.copy()
	first = True
	for chunk in streamer:
	if stop_event.is_set(): break
	if first:
	partial.append(chunk[0])
	first = False
	else:
	partial[-1] = chunk[0]
	yield partial

	def stop():
	stop_event.set()

	# ==============================
	# UI
	# ==============================
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("# Dual Local AI — Clean Responses (No Echoing!)\nCode/Math → Qwen2.5-Coder \| Chat → Mistral")
	chatbot = gr.Chatbot(height=720, type="messages", show_copy_button=True)
	with gr.Row():
	txt = gr.Textbox(placeholder="Ask anything…", label="Message", lines=4, scale=8)
	send = gr.Button("Send", variant="primary")
	stop_btn = gr.Button("Stop", variant="stop")

	send.click(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt)
	txt.submit(chat, [txt, chatbot], chatbot).then(lambda: gr.update(value=""), outputs=txt)
	stop_btn.click(stop)

	print("Launching FINAL version (no echoing, no crashes)...")
	demo.launch(server_port=7860, inbrowser=True)