Neroml / app.py

Update app.py

1fd12bd verified 6 months ago

52.5 kB

	from flask import Flask, render_template, request, jsonify
	import numpy as np
	import pandas as pd
	import joblib
	import os
	from sklearn.svm import SVR
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import mean_squared_error, r2_score
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.tree import DecisionTreeClassifier
	from sklearn import svm
	from sklearn.naive_bayes import GaussianNB # <--- Add this import
	from sklearn.feature_extraction.text import CountVectorizer
	from textblob import TextBlob
	import traceback
	from flask_cors import CORS
	from werkzeug.utils import secure_filename # For secure file names
	import io # To read CSV from memory
	import re
	from sklearn.cluster import KMeans, DBSCAN
	from PIL import Image
	import matplotlib.pyplot as plt
	from joblib import load # ✅ This is the missing line
	import traceback
	import pickle
	from sklearn.svm import SVC
	from sklearn.datasets import make_classification
	import plotly.graph_objs as go
	import json
	import requests
	from PIL import Image


	# from transformers import pipeline
	from dotenv import load_dotenv
	import os
	from urllib.parse import urlparse
	import tldextract
	import string


	# from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

	# model_name = "microsoft/deberta-v3-small"

	# tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
	# model = AutoModelForSequenceClassification.from_pretrained(model_name)

	# bert_checker = pipeline("text-classification", model=model, tokenizer=tokenizer)

	# Load environment variables from .env
	load_dotenv()
	#spam url import relateted
	import nltk, os

	# Tell NLTK to also check the local nltk_data folder
	nltk.data.path.append(os.path.join(os.path.dirname(__file__), "nltk_data"))

	from nltk.corpus import words

	# Load the words corpus
	valid_words = set(words.words())
	print("engineering" in valid_words) # ✅ Should be True
	print("engineerigfnnxng" in valid_words) # ❌ Should be False
	import wordninja # Function to split words into valid parts
	import re
	from urllib.parse import urlparse
	from spellchecker import SpellChecker

	import wordninja
	# end urlspam
	import google.generativeai as genai

	# app.py
	# import streamlit as st
	# from load_file import load_file

	# st.title("Download HuggingFace Repo Files in Streamlit")

	# filename = st.text_input("Enter filename from repo:", "model.safetensors")

	# if st.button("Download"):
	# try:
	# local_path = load_file(filename)
	# st.success(f"✅ File downloaded to: {local_path}")
	# st.write("You can now use this file in your app.")
	# except Exception as e:
	# st.error(f"❌ Error: {str(e)}")


	# Set API key (no need to assign OpenAI() to client like that)
	# openai.api_key = os.getenv("OPENAI_API_KEY")

	# def ask_openai_scientific_validation(statement):
	# prompt = f"""Assess the scientific accuracy of: "{statement}"\nRespond with ✅ (possible) or ❌ (impossible), and explain simply."""

	# try:
	# client = OpenAI() # This is correct placement
	# response = client.chat.completions.create(
	# model="gpt-3.5-turbo",
	# messages=[
	# {"role": "system", "content": "You are a scientific fact-checker."},
	# {"role": "user", "content": prompt}
	# ],
	# temperature=0.7,
	# max_tokens=150
	# )


	# return response.choices[0].message.content.strip()

	# except Exception as e:
	# return f"⚠️ Could not verify:\n\n{str(e)}"


	#huggung face code start
	from huggingface_hub import hf_hub_download
	import joblib
	import numpy as np
	import torch

	REPO_ID = "deedrop1140/Neroml"

	def load_file(filename):
	"""Download a file from Hugging Face Hub and load it with the right library."""
	file_path = hf_hub_download(repo_id=REPO_ID, filename=filename)

	if filename.endswith(".pkl") or filename.endswith(".joblib"):
	return joblib.load(file_path)
	elif filename.endswith(".npy"):
	return np.load(file_path, allow_pickle=True)
	elif filename.endswith(".pt") or filename.endswith(".pth"):
	return torch.load(file_path)
	else:
	return file_path

	# # =====================
	# # Replace your old model loads with this:
	# # =====================

	# # Models
	# knn_model = load_file("Models/knn_model.pkl")
	# lasso_model = load_file("Models/lasso_model.pkl")
	# liar_model = load_file("Models/liar_model.joblib")
	# linear_model = load_file("Models/linear_model.pkl")
	# logistic_model = load_file("Models/logistic_model.pkl")
	# nb_url_model = load_file("Models/nb_url_model.pkl")
	# poly_model = load_file("Models/poly_model.pkl")
	# rf_model = load_file("Models/rf_model.pkl")
	# ridge_model = load_file("Models/ridge_model.pkl")
	# supervised_model = load_file("Models/supervised_model.pkl")
	# svr_model = load_file("Models/svr_model.pkl")
	# voting_url_model = load_file("Models/voting_url_model.pkl")

	# # Vectorizers / Encoders / Scalers
	# label_classes = load_file("Models/label_classes.npy")
	# label_encoder = load_file("Models/label_encoder.pkl")
	# lasso_scaler = load_file("Models/lasso_scaler.pkl")
	# liar_vectorizer = load_file("Models/liar_vectorizer.joblib")
	# nb_url_vectorizer = load_file("Models/nb_url_vectorizer.pkl")
	# poly_transform = load_file("Models/poly_transform.pkl")
	# ridge_scaler = load_file("Models/ridge_scaler.pkl")
	# svr_scaler_X = load_file("Models/svr_scaler_X.pkl")
	# svr_scaler_y = load_file("Models/svr_scaler_y.pkl")
	# tfidf_vectorizer = load_file("Models/tfidf_vectorizer.pkl")
	# url_vectorizer = load_file("Models/url_vectorizer.pkl")
	# vectorizer_joblib = load_file("Models/vectorizer.joblib")
	# vectorizer_pkl = load_file("Models/vectorizer.pkl")
	# # huggung face code end

	MODEL_DIR = "Models"
	DATA_DIR = "housedata" # Assuming your house data is here
	UPLOAD_FOLDER = 'static/uploads' # NEW: Folder for temporary user uploads

	app = Flask(__name__)
	app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
	CORS(app)



	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

	def ask_gemini(statement):
	model = genai.GenerativeModel("gemini-2.0-flash-001")
	response = model.generate_content(f"Verify this statement for truth: {statement}")
	return response.text

	#rfc
	# model = load("Models/liar_model.joblib")
	# vectorizer = load("Models/liar_vectorizer.joblib")

	# Load BERT fact-checker pipeline (local model)
	# bert_checker = pipeline("text-classification", model="microsoft/deberta-v3-small")

	#endrfc

	#svm

	# ==== SVM Setup ====
	X, y = make_classification(n_samples=100, n_features=2, n_redundant=0,
	n_clusters_per_class=1, n_classes=2, random_state=42)
	scaler = StandardScaler()
	X = scaler.fit_transform(X)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

	# Train SVM
	svm_model = SVC(kernel="linear")
	svm_model.fit(X_train, y_train)

	#endsvm
	#deision tree
	GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
	GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent"
	#end deision tree

	# Ensure directories exist
	os.makedirs(MODEL_DIR, exist_ok=True)
	os.makedirs(DATA_DIR, exist_ok=True)
	os.makedirs(UPLOAD_FOLDER, exist_ok=True) # NEW: Create upload folder

	def clean_text(text):
	if pd.isnull(text):
	return ""
	text = text.lower()
	text = re.sub(r"http\S+\|www\S+\|https\S+", '', text)
	text = text.translate(str.maketrans('', '', string.punctuation))
	text = re.sub(r'\d+', '', text)
	text = re.sub(r'\s+', ' ', text).strip()
	return text

	# --- Helper functions for data generation (conceptual for demo) ---
	def generate_linear_data(n_samples=100, noise=0.5):
	X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1)
	y = 2 * X.squeeze() + 5 + noise * np.random.randn(n_samples)
	return X, y

	def generate_non_linear_data(n_samples=100, noise=0.5):
	X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1)
	y = np.sin(X.squeeze()) * 10 + noise * np.random.randn(n_samples)
	return X, y

	def generate_noisy_data(n_samples=100, noise_factor=3.0):
	X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1)
	y = 2 * X.squeeze() + 5 + noise_factor * np.random.randn(n_samples) # Increased noise
	return X, y

	# Function to generate house price data (using your existing data structure for consistency)
	def get_house_data():
	try:
	df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv'))
	# Using a subset of features for simplicity in demo
	features = ['GrLivArea', 'OverallQual', 'GarageCars', 'TotalBsmtSF', 'YearBuilt']
	# Check if all required columns exist
	if not all(col in df.columns for col in features + ['SalePrice']):
	print("Warning: Missing one or more required columns in train.csv for house data.")
	return None, None
	X = df[features]
	y = df['SalePrice']
	return X, y
	except FileNotFoundError:
	print(f"Error: train.csv not found in {DATA_DIR}. Please ensure your data is there.")
	return None, None
	except Exception as e:
	print(f"Error loading house data: {e}")
	return None, None

	# Dictionary to hold all loaded models
	loaded_models = {}

	# Load logistic model and vectorizer for SMS
	# vectorizer = joblib.load("Models/logvectorizer.pkl")
	# model = joblib.load("Models/logistic_model.pkl")
	# vectorizer = load_file("Models/logvectorizer.pkl")
	# model = load_file("Models/logistic_model.pkl")


	# # Load models once NB+DT+SVM is trained
	# try:
	# model = load_file("Models/logistic_model.pkl")
	# # vectorizer = joblib.load("Models/logvectorizer.pkl")
	# # model = joblib.load("Models/logistic_model.pkl")
	# vectorizer = load_file("Models/vectorizer.pkl")
	# print("✅ Model and vectorizer loaded into memory successfully!")
	# except Exception as e:
	# vectorizer = None
	# model = None
	# print(f"❌ Error: Could not load model or vectorizer. Please check your file paths. Error: {e}")
	# #END NB+DT+SVM

	# === Naive Bayes URL Spam Classifier (NB_spam.html) ===
	# === Load Model & Vectorizer ===



	VT_API_KEY = os.getenv("VT_API_KEY")
	nb_model = load_file("Models/nb_url_model.pkl")
	vectorizer = load_file("Models/nb_url_vectorizer.pkl")

	if nb_model is not None and vectorizer is not None:
	print("✅ Loaded model and vectorizer.")
	else:
	print("❌ Model or vectorizer not found.")






	def load_all_models():
	"""
	Loads all necessary models into the loaded_models dictionary when the app starts.
	"""
	global loaded_models

	# Load Supervised Model
	# Load Supervised Model
	try:
	supervised_model_path = load_file("Models/supervised_model.pkl")

	# Debug: check what load_file actually returned
	print("DEBUG -> supervised_model_path type:", type(supervised_model_path))

	# If load_file returned a path (string), load with joblib
	if isinstance(supervised_model_path, str):
	loaded_models['supervised'] = joblib.load(supervised_model_path)
	else:
	# If load_file already returned the model object
	loaded_models['supervised'] = supervised_model_path

	print("Supervised model loaded successfully")

	except FileNotFoundError:
	print(f"Error: Supervised model file not found at {supervised_model_path}. "
	"Please run train_model.py first.")
	loaded_models['supervised'] = None # Mark as not loaded
	except Exception as e:
	print(f"Error loading supervised model: {e}")
	loaded_models['supervised'] = None


	# Load models when Flask app context is ready
	with app.app_context():
	load_all_models()

	@app.route('/')
	def home():
	return render_template('home.html')

	@app.route('/supervised', methods=['GET', 'POST'])
	def supervised():
	prediction = None
	hours_studied_input = None

	if loaded_models['supervised'] is None:
	return "Error: Supervised model could not be loaded. Please check server logs.", 500

	if request.method == 'POST':
	try:
	hours_studied_input = float(request.form['hours'])
	input_data = np.array([[hours_studied_input]])

	predicted_score = loaded_models['supervised'].predict(input_data)[0]
	prediction = round(predicted_score, 2)

	except ValueError:
	print("Invalid input for hours studied.")
	prediction = "Error: Please enter a valid number."
	except Exception as e:
	print(f"An error occurred during prediction: {e}")
	prediction = "Error during prediction."

	return render_template('supervised.html', prediction=prediction, hours_studied_input=hours_studied_input)


	@app.route('/polynomial', methods=['GET', 'POST'])
	def polynomial():
	if request.method == 'POST':
	try:
	hours = float(request.form['hours'])

	# model = joblib.load('Models/poly_model.pkl')
	# poly = joblib.load('Models/poly_transform.pkl')
	model = load_file("Models/poly_model.pkl")
	poly= load_file("Models/poly_transform.pkl")

	transformed_input = poly.transform([[hours]])
	prediction = model.predict(transformed_input)[0]

	return render_template("poly.html", prediction=round(prediction, 2), hours=hours)

	except Exception as e:
	print(f"Error: {e}")
	return render_template("poly.html", error="Something went wrong.")

	return render_template("poly.html")


	@app.route('/random_forest', methods=['GET', 'POST'])
	def random_forest():
	if request.method == 'POST':
	try:
	hours = float(request.form['hours'])
	model = load_file("Models/rf_model.pkl")
	# model = joblib.load('Models/rf_model.pkl')
	prediction = model.predict([[hours]])[0]

	return render_template("rf.html", prediction=round(prediction, 2), hours=hours)
	except Exception as e:
	print(f"[ERROR] {e}")
	return render_template("rf.html", error="Prediction failed. Check your input.")
	return render_template("rf.html")

	@app.route('/prediction_flow')
	def prediction_flow():
	return render_template('prediction_flow.html')

	@app.route("/lasso", methods=["GET", "POST"])
	def lasso():
	if request.method == "POST":
	try:
	inputs = [float(request.form.get(f)) for f in ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt']]

	model = load_file("Models/lasso_model.pkl")
	scaler = load_file("Models/lasso_scaler.pkl")
	# model = joblib.load("Models/lasso_model.pkl")
	# scaler = joblib.load("Models/lasso_scaler.pkl")

	scaled_input = scaler.transform([inputs])

	prediction = model.predict(scaled_input)[0]
	return render_template("lasso.html", prediction=round(prediction, 2))

	except Exception as e:
	return render_template("lasso.html", error=str(e))

	return render_template("lasso.html")


	@app.route('/ridge', methods=['GET', 'POST'])
	def ridge():
	prediction = None
	error = None

	try:
	model = load_file("Models/ridge_model.pkl")
	scaler = load_file("Models/ridge_scaler.pkl")
	# model = joblib.load(os.path.join(MODEL_DIR, 'ridge_model.pkl'))
	# scaler = joblib.load(os.path.join(MODEL_DIR, 'ridge_scaler.pkl'))


	except Exception as e:
	return f"❌ Error loading Ridge model: {e}", 500

	if request.method == 'POST':
	try:
	features = ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt']
	input_data = [float(request.form[feature]) for feature in features]
	input_scaled = scaler.transform([input_data])
	prediction = model.predict(input_scaled)[0]
	except Exception as e:
	error = str(e)

	return render_template('ridge.html', prediction=prediction, error=error)


	# --- SVR Routes ---
	@app.route('/svr') # This route is for the initial GET request to load the page
	def svr_page():
	return render_template('svr.html')


	@app.route('/run_svr_demo', methods=['POST'])
	def run_svr_demo():
	try:
	# Check if the request contains JSON (for predefined datasets) or FormData (for file uploads)
	if request.is_json:
	data = request.json
	else:
	# For FormData, data is accessed via request.form for fields, request.files for files
	data = request.form

	dataset_type = data.get('dataset_type', 'linear')
	kernel_type = data.get('kernel', 'rbf')
	C_param = float(data.get('C', 1.0))
	gamma_param = float(data.get('gamma', 0.1))
	epsilon_param = float(data.get('epsilon', 0.1))

	X, y = None, None

	if dataset_type == 'linear':
	X, y = generate_linear_data()
	elif dataset_type == 'non_linear':
	X, y = generate_non_linear_data()
	elif dataset_type == 'noisy':
	X, y = generate_noisy_data()
	elif dataset_type == 'house_data':
	X_house, y_house = get_house_data()
	if X_house is not None and not X_house.empty:
	X = X_house[['GrLivArea']].values # Only GrLivArea for simple 1D plotting
	y = y_house.values
	else:
	X, y = generate_linear_data() # Fallback if house data is missing/invalid
	elif dataset_type == 'custom_csv': # NEW: Handle custom CSV upload
	uploaded_file = request.files.get('file')
	x_column_name = data.get('x_column_name')
	y_column_name = data.get('y_column_name')

	if not uploaded_file or uploaded_file.filename == '':
	return jsonify({'error': 'No file uploaded for custom CSV.'}), 400
	if not x_column_name or not y_column_name:
	return jsonify({'error': 'X and Y column names are required for custom CSV.'}), 400

	try:
	# Read CSV into a pandas DataFrame from in-memory BytesIO object
	df = pd.read_csv(io.BytesIO(uploaded_file.read()))

	if x_column_name not in df.columns or y_column_name not in df.columns:
	missing_cols = []
	if x_column_name not in df.columns: missing_cols.append(x_column_name)
	if y_column_name not in df.columns: missing_cols.append(y_column_name)
	return jsonify({'error': f"Missing columns in uploaded CSV: {', '.join(missing_cols)}"}), 400

	X = df[[x_column_name]].values # Ensure X is 2D for scikit-learn
	y = df[y_column_name].values
	except Exception as e:
	return jsonify({'error': f"Error reading or processing custom CSV: {str(e)}"}), 400
	else: # Fallback for unknown dataset types
	X, y = generate_linear_data()


	if X is None or y is None or len(X) == 0:
	return jsonify({'error': 'Failed to generate or load dataset.'}), 500

	# Scale data
	scaler_X = StandardScaler()
	scaler_y = StandardScaler()

	X_scaled = scaler_X.fit_transform(X)
	y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten()

	X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

	# Train SVR model
	svr_model = SVR(kernel=kernel_type, C=C_param, gamma=gamma_param, epsilon=epsilon_param)
	svr_model.fit(X_train, y_train)

	# Make predictions
	y_pred_scaled = svr_model.predict(X_test)

	# Inverse transform predictions to original scale for metrics
	y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
	y_test_original = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten()

	# Calculate metrics
	mse = mean_squared_error(y_test_original, y_pred)
	r2 = r2_score(y_test_original, y_pred)
	support_vectors_count = len(svr_model.support_vectors_)

	# Prepare data for plotting
	plot_X_original = scaler_X.inverse_transform(X_scaled)
	plot_y_original = scaler_y.inverse_transform(y_scaled.reshape(-1, 1)).flatten()

	x_plot = np.linspace(plot_X_original.min(), plot_X_original.max(), 500).reshape(-1, 1)
	x_plot_scaled = scaler_X.transform(x_plot)
	y_plot_scaled = svr_model.predict(x_plot_scaled)
	y_plot_original = scaler_y.inverse_transform(y_plot_scaled.reshape(-1, 1)).flatten()

	y_upper_scaled = y_plot_scaled + epsilon_param
	y_lower_scaled = y_plot_scaled - epsilon_param
	y_upper_original = scaler_y.inverse_transform(y_upper_scaled.reshape(-1, 1)).flatten()
	y_lower_original = scaler_y.inverse_transform(y_lower_scaled.reshape(-1, 1)).flatten()

	plot_data = {
	'data': [
	{
	'x': plot_X_original.flatten().tolist(),
	'y': plot_y_original.tolist(),
	'mode': 'markers',
	'type': 'scatter',
	'name': 'Original Data'
	},
	{
	'x': x_plot.flatten().tolist(),
	'y': y_plot_original.tolist(),
	'mode': 'lines',
	'type': 'scatter',
	'name': 'SVR Prediction',
	'line': {'color': 'red'}
	},
	{
	'x': x_plot.flatten().tolist(),
	'y': y_upper_original.tolist(),
	'mode': 'lines',
	'type': 'scatter',
	'name': 'Epsilon Tube (Upper)',
	'line': {'dash': 'dash', 'color': 'green'},
	'fill': 'tonexty',
	'fillcolor': 'rgba(0,128,0,0.1)'
	},
	{
	'x': x_plot.flatten().tolist(),
	'y': y_lower_original.tolist(),
	'mode': 'lines',
	'type': 'scatter',
	'name': 'Epsilon Tube (Lower)',
	'line': {'dash': 'dash', 'color': 'green'}
	}
	],
	'layout': {
	'title': f'SVR Regression (Kernel: {kernel_type.upper()})',
	'xaxis': {'title': 'Feature Value'},
	'yaxis': {'title': 'Target Value'},
	'hovermode': 'closest'
	}
	}

	return jsonify({
	'mse': mse,
	'r2_score': r2,
	'support_vectors_count': support_vectors_count,
	'plot_data': plot_data
	})

	except Exception as e:
	print(f"Error in SVR demo: {e}")
	return jsonify({'error': str(e)}), 500


	def clean_text(text):
	return text.lower().strip()

	import re

	# Load saved model and vectorizer
	# model = joblib.load("Models/logistic_model.pkl")
	# vectorizer = joblib.load("Models/logvectorizer.pkl")


	# Text cleaning
	def clean_text(text):
	text = text.lower()
	text = re.sub(r'\W', ' ', text)
	text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text)
	text = re.sub(r'\s+', ' ', text)
	return text.strip()

	@app.route('/logistic', methods=['GET', 'POST'])
	def logistic():
	prediction, confidence_percentage, cleaned, tokens, probability = None, None, None, None, None


	model = load_file("Models/logistic_model.pkl")
	vectorizer = load_file("Models/logvectorizer.pkl")

	if request.method == "POST":
	msg = request.form.get('message', '')
	cleaned = clean_text(msg)
	tokens = cleaned.split()


	try:
	vector = vectorizer.transform([cleaned])
	probability = model.predict_proba(vector)[0][1]
	prediction = "Spam" if probability >= 0.5 else "Not Spam"
	confidence_percentage = round(probability * 100, 2)
	except Exception as e:
	print("Error predicting:", e)
	prediction = "Error"
	confidence_percentage = 0

	return render_template(
	"logistic.html",
	prediction=prediction,
	confidence_percentage=confidence_percentage,
	cleaned=cleaned,
	tokens=tokens,
	probability=round(probability, 4) if probability else None,
	source="sms"
	)

	@app.route('/logistic-sms', methods=['POST'])
	def logistic_sms():
	try:
	data = request.get_json()
	msg = data.get('message', '')
	cleaned = clean_text(msg)
	tokens = cleaned.split()

	vector = vectorizer.transform([cleaned])
	probability = model.predict_proba(vector)[0][1]
	prediction = "Spam" if probability >= 0.5 else "Not Spam"
	confidence_percentage = round(probability * 100, 2)

	return jsonify({
	"prediction": prediction,
	"confidence": confidence_percentage,
	"probability": round(probability, 4),
	"cleaned": cleaned,
	"tokens": tokens,
	"source": "json"
	})

	except Exception as e:
	print("Error in /logistic-sms:", e)
	return jsonify({"error": "Internal server error", "details": str(e)}), 500



	# @app.route("/logistic", methods=["GET", "POST"])
	# def logistic():
	# prediction = None
	# error = None
	# if request.method == "POST":
	# try:
	# input_text = request.form.get("message")

	# # Load the vectorizer and logistic model from Models folder
	# vectorizer = joblib.load("Models/vectorizer.pkl")
	# model = joblib.load("Models/logistic_model.pkl")

	# # Transform input and make prediction
	# input_vector = vectorizer.transform([input_text])
	# result = model.predict(input_vector)[0]

	# prediction = "✅ Not Spam" if result == 0 else "🚨 Spam"
	# except Exception as e:
	# error = str(e)

	# return render_template("logistic.html", prediction=prediction, error=error)






	@app.route("/knn")
	def knn_visual():
	return render_template("knn.html")

	@app.route('/knn_visual_predict', methods=['POST'])
	def knn_visual_predict():
	data = request.get_json()
	points = np.array(data['points']) # shape: (N, 3)
	test_point = np.array(data['test_point']) # shape: (2,)
	k = int(data['k'])

	X = points[:, :2]
	y = points[:, 2].astype(int)

	knn = KNeighborsClassifier(n_neighbors=k)
	knn.fit(X, y)
	pred = knn.predict([test_point])[0]

	dists = np.linalg.norm(X - test_point, axis=1)
	neighbor_indices = np.argsort(dists)[:k]
	neighbors = X[neighbor_indices]

	return jsonify({
	'prediction': int(pred),
	'neighbors': neighbors.tolist()
	})

	# 🔷 Route 2: KNN Digit Image Classifier (Upload-based)
	@app.route("/knn_image")
	def knn_image_page():
	return render_template("knn_image.html")

	from PIL import Image

	@app.route("/predict_image", methods=["POST"])
	def predict_image():
	if "image" not in request.files:
	return jsonify({"error": "No image uploaded"}), 400

	file = request.files["image"]

	try:
	# ✅ Use PIL to open image from file bytes
	image = Image.open(file.stream).convert("RGB")
	image = image.resize((32, 32)) # Resize to match training size
	img_array = np.array(image).flatten().reshape(1, -1)
	except Exception as e:
	return jsonify({"error": f"Invalid image. {str(e)}"}), 400

	# Load model & labels

	model = load_file("Models/knn_model.pkl")
	label_classes = load_file("Models/lasso_model.pkl")
	# model = joblib.load("Models/knn_model.pkl")
	# label_classes = np.load("Models/label_classes.npy")

	# Predict class and get probabilities
	probs = model.predict_proba(img_array)[0]
	pred_index = np.argmax(probs)
	pred_label = label_classes[pred_index]
	confidence = round(float(probs[pred_index]) * 100, 2)

	return jsonify({
	"prediction": str(pred_label),
	"confidence": f"{confidence}%",
	"all_probabilities": {
	str(label_classes[i]): round(float(probs[i]) * 100, 2)
	for i in range(len(probs))
	}
	})

	@app.route("/rfc")
	def random_forest_page():
	return render_template("Random_Forest_Classifier.html") # Your beautiful HTML goes in rfc.html

	@app.route('/rf_visual_predict', methods=['POST'])
	def rf_visual_predict():
	try:
	data = request.get_json()
	print("📦 Incoming JSON data:", data)

	labeled_points = data.get('points')
	test_point = data.get('test_point')

	if not labeled_points or not test_point:
	return jsonify({"error": "Missing points or test_point"}), 400

	df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class'])
	X = df[['X1', 'X2']]
	y = df['Class']

	rf_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
	rf_model.fit(X, y)

	test_point_np = np.array(test_point).reshape(1, -1)
	prediction = int(rf_model.predict(test_point_np)[0])

	x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1
	y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1
	xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
	np.linspace(y_min, y_max, 100))

	Z = rf_model.predict(np.c_[xx.ravel(), yy.ravel()])
	Z = Z.reshape(xx.shape)

	return jsonify({
	'prediction': prediction,
	'decision_boundary_z': Z.tolist(),
	'decision_boundary_x_coords': xx[0, :].tolist(),
	'decision_boundary_y_coords': yy[:, 0].tolist()
	})

	except Exception as e:
	import traceback
	print("❌ Exception in /rf_visual_predict:")
	traceback.print_exc() # Print full error stack trace
	return jsonify({"error": str(e)}), 500

	@app.route("/liar")
	def liar_input_page():
	return render_template("rfc_liar_predict.html")







	@app.route("/ref/liar/predictor", methods=["POST"])
	def liar_predictor():
	try:
	data = request.get_json()
	statement = data.get("statement", "")

	if not statement:
	return jsonify({"success": False, "error": "Missing statement"}), 400

	try:
	# 🔍 LIAR Model Prediction
	features = vectorizer.transform([statement])
	prediction = model.predict(features)[0]

	liar_label_map = {
	0: "It can be false 🔥",
	1: "False ❌",
	2: "Mostly false but can be true 🤏",
	3: "Half True 🌓",
	4: "Mostly True 👍",
	5: "True ✅"
	}

	prediction_label = liar_label_map.get(int(prediction), "Unknown")

	except ValueError as ve:
	if "features" in str(ve):
	# Fallback to Gemini API
	prediction_label = ask_gemini(statement)
	else:
	raise ve

	# 🧠 BERT-Based Scientific Check
	bert_result = bert_checker(statement)[0]
	bert_label = bert_result["label"]
	bert_score = round(bert_result["score"] * 100, 2)

	science_label_map = {
	"LABEL_0": "✅ Scientifically Possible",
	"LABEL_1": "❌ Scientifically Impossible"
	}

	scientific_check = f"{science_label_map.get(bert_label, bert_label)} ({bert_score:.2f}%)"

	return jsonify({
	"success": True,
	"prediction": prediction_label,
	"reason": "Predicted from linguistic and content-based patterns, or Gemini fallback.",
	"scientific_check": scientific_check
	})

	except Exception as e:
	traceback.print_exc()
	return jsonify({"success": False, "error": str(e)}), 500



	#svm
	@app.route("/svm")
	def svm_page():
	return render_template("svm.html")

	@app.route('/svm_visual_predict', methods=['POST'])
	def svm_visual_predict():
	data = request.json
	labeled_points = data['points']
	test_point = data['test_point']
	svm_type = data['svm_type']
	c_param = float(data['c_param'])
	gamma_param = float(data['gamma_param']) # Will be ignored for linear kernel

	df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class'])
	X = df[['X1', 'X2']]
	y = df['Class']

	# 1. Train the SVM Classifier
	if svm_type == 'linear':
	svm_model = svm.SVC(kernel='linear', C=c_param, random_state=42)
	elif svm_type == 'rbf':
	svm_model = svm.SVC(kernel='rbf', C=c_param, gamma=gamma_param, random_state=42)
	else:
	return jsonify({'error': 'Invalid SVM type'}), 400

	svm_model.fit(X, y)

	# 2. Predict for the test point
	test_point_np = np.array(test_point).reshape(1, -1)
	prediction = int(svm_model.predict(test_point_np)[0])

	# 3. Get Support Vectors
	# support_vectors_ refers to indices of support vectors
	# svc_model.support_vectors_ gives the actual support vectors
	support_vectors = svm_model.support_vectors_.tolist()

	# 4. Generate data for the decision boundary
	# Create a meshgrid of points to predict across the entire plot area
	x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1
	y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1

	# Extend range slightly to ensure test point is within boundary if it's an outlier
	x_min = min(x_min, test_point_np[0,0] - 1)
	x_max = max(x_max, test_point_np[0,0] + 1)
	y_min = min(y_min, test_point_np[0,1] - 1)
	y_max = max(y_max, test_point_np[0,1] + 1)

	xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
	np.linspace(y_min, y_max, 100))

	# Predict class for each point in the meshgrid
	Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()])
	Z = Z.reshape(xx.shape)

	# Convert numpy arrays to lists for JSON serialization
	decision_boundary_z = Z.tolist()
	decision_boundary_x_coords = xx[0, :].tolist()
	decision_boundary_y_coords = yy[:, 0].tolist()

	return jsonify({
	'prediction': prediction,
	'decision_boundary_z': decision_boundary_z,
	'decision_boundary_x_coords': decision_boundary_x_coords,
	'decision_boundary_y_coords': decision_boundary_y_coords,
	'support_vectors': support_vectors
	})







	@app.route('/api/explain', methods=['POST'])
	def explain():
	# In a real deployed environment, you'd secure your API key.
	# For Canvas, it's automatically injected if GEMINI_API_KEY is empty string.
	# If running locally and not in Canvas, set GEMINI_API_KEY in your environment variables.
	if not GEMINI_API_KEY and not os.getenv("FLASK_ENV") == "development": # Allow empty key in dev for local testing
	return jsonify({'error': 'Missing API key'}), 500

	payload = request.get_json()

	try:
	response = requests.post(
	f"{GEMINI_URL}?key={GEMINI_API_KEY}",
	headers={"Content-Type": "application/json"},
	json=payload
	)
	response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
	return jsonify(response.json())
	except requests.exceptions.RequestException as e:
	app.logger.error(f"Error calling Gemini API: {e}") # Log the error on the server side
	return jsonify({'error': str(e)}), 500

	@app.route('/decision_tree')
	def decision_tree_page():
	# This route serves your Decision Tree visualization page
	# Ensure the HTML file name matches (e.g., 'decision_tree_viz.html' or 'decision_tree.html')
	return render_template('decision_tree.html') # Check your actual HTML file name here


	@app.route('/game')
	def decision_tree_game():
	"""Renders the interactive game page for decision trees."""
	return render_template('decision_tree_game.html')

	@app.route('/dt_visual_predict', methods=['POST'])
	def dt_visual_predict():
	try:
	data = request.json
	labeled_points = data['points']
	test_point = data['test_point']
	max_depth = int(data['max_depth'])

	# Convert labeled_points to a pandas DataFrame
	df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class'])
	X = df[['X1', 'X2']]
	y = df['Class']

	# Check if there's enough data to train
	if X.empty or len(X) < 2:
	return jsonify({'error': 'Not enough data points to train the model.'}), 400

	# 1. Train the Decision Tree Classifier (This is the "model" part)
	dt_model = DecisionTreeClassifier(max_depth=max_depth, random_state=42)
	dt_model.fit(X, y)

	# 2. Predict for the test point
	test_point_np = np.array(test_point).reshape(1, -1)
	prediction = int(dt_model.predict(test_point_np)[0])

	# 3. Generate data for the decision boundary
	x_min, x_max = X['X1'].min(), X['X1'].max()
	y_min, y_max = X['X2'].min(), X['X2'].max()

	# Add a buffer to the plot range to make sure points are not on the edge
	# And handle cases where min == max (e.g., all points have same X1 value)
	x_buffer = 1.0 if (x_max - x_min) == 0 else (x_max - x_min) * 0.1
	y_buffer = 1.0 if (y_max - y_min) == 0 else (y_max - y_min) * 0.1

	x_min -= x_buffer
	x_max += x_buffer
	y_min -= y_buffer
	y_max += y_buffer

	# Ensure test point is also comfortably within the range
	x_min = min(x_min, test_point_np[0,0] - 0.5)
	x_max = max(x_max, test_point_np[0,0] + 0.5)
	y_min = min(y_min, test_point_np[0,1] - 0.5)
	y_max = max(y_max, test_point_np[0,1] + 0.5)

	# Create a meshgrid for plotting the decision boundary
	xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
	np.linspace(y_min, y_max, 100))

	# Predict class for each point in the meshgrid using the trained model
	Z = dt_model.predict(np.c_[xx.ravel(), yy.ravel()])
	Z = Z.reshape(xx.shape)

	# Convert numpy arrays to lists for JSON serialization
	decision_boundary_z = Z.tolist()
	decision_boundary_x_coords = xx[0, :].tolist()
	decision_boundary_y_coords = yy[:, 0].tolist()

	return jsonify({
	'prediction': prediction,
	'decision_boundary_z': decision_boundary_z,
	'decision_boundary_x_coords': decision_boundary_x_coords,
	'decision_boundary_y_coords': decision_boundary_y_coords
	})
	except Exception as e:
	# This will print the actual error to your terminal
	print(f"An error occurred in /dt_visual_predict: {e}")
	# Return a more informative error message to the frontend
	return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500

	# --- Naive Bayes Routes ---

	from urllib.parse import urlparse
	from sklearn.naive_bayes import GaussianNB
	from nltk.corpus import words

	nb_model = load_file("Models/nb_url_model.pkl")
	vectorizer = load_file("Models/nb_url_vectorizer.pkl")

	if nb_model is not None and vectorizer is not None:
	print("✅ Loaded Naive Bayes URL model")
	else:
	nb_model, vectorizer = None, None
	print("❌ Model/vectorizer not found")



	@app.route('/nb_spam')
	def nb_spam_page():
	return render_template('NB_spam.html')


	import re
	from urllib.parse import urlparse
	from spellchecker import SpellChecker
	import wordninja



	# ---- Whitelist (your full one, unchanged) ----
	whitelist = set([
	# Search Engines
	'google', 'bing', 'yahoo', 'duckduckgo', 'baidu', 'ask',

	# Social Media
	'facebook', 'instagram', 'twitter', 'linkedin', 'snapchat', 'tiktok',
	'threads', 'pinterest', 'reddit', 'quora',

	# Communication Tools
	'whatsapp', 'telegram', 'skype', 'zoom', 'meet', 'discord',
	'teams', 'signal', 'messenger',

	# Global E-commerce
	'amazon', 'ebay', 'shopify', 'alibaba', 'walmart', 'target',
	'etsy', 'shein', 'bestbuy', 'costco', 'newegg',

	# Indian E-commerce / Services
	'flipkart', 'myntra', 'ajio', 'nykaa', 'meesho', 'snapdeal',
	'paytm', 'phonepe', 'mobikwik', 'zomato', 'swiggy', 'ola', 'uber', 'bookmyshow',
	'ixigo', 'makemytrip', 'yatra', 'redbus', 'bigbasket', 'grofers', 'blinkit',
	'universalcollegeofengineering',

	# Education / Productivity
	'youtube', 'docs', 'drive', 'calendar', 'photos', 'gmail', 'notion',
	'edx', 'coursera', 'udemy', 'khanacademy', 'byjus', 'unacademy',

	# News / Media / Tech
	'bbc', 'cnn', 'nyt', 'forbes', 'bloomberg', 'reuters',
	'ndtv', 'indiatimes', 'thehindu', 'hindustantimes', 'indiatoday',
	'techcrunch', 'verge', 'wired',

	# Streaming / Entertainment
	'netflix', 'hotstar', 'primevideo', 'spotify', 'gaana', 'wynk', 'saavn', 'voot',

	# Dev & Tools
	'github', 'stackoverflow', 'medium', 'gitlab', 'bitbucket',
	'adobe', 'figma', 'canva',

	# Financial / Banking
	'hdfcbank', 'icicibank', 'sbi', 'axisbank', 'kotak', 'boi', 'upi',
	'visa', 'mastercard', 'paypal', 'stripe', 'razorpay', 'phonepe', 'paytm',

	# Government / Utilities
	'gov', 'nic', 'irctc', 'uidai', 'mygov', 'incometax', 'aadhar', 'rbi',

	# Others Common
	'airtel', 'jio', 'bsnl', 'vi', 'speedtest', 'cricbuzz', 'espn', 'espncricinfo',
	'wikipedia', 'mozilla', 'opera', 'chrome', 'android', 'apple', 'windows', 'microsoft'
	])

	# ... your full whitelist from before ...


	# ---- Trusted & Bad TLDs ----
	trusted_tlds = [
	'.gov', '.nic.in', '.edu', '.ac.in', '.mil', '.org', '.int',
	'.co.in', '.gov.in', '.res.in', '.net.in', '.nic.gov.in'
	]

	# Expanded Bad TLDs (Rule 4)
	bad_tlds = [
	'.xyz', '.tk', '.ml', '.ga', '.cf', '.top', '.gq', '.cn',
	'.ru', '.pw', '.bid', '.link', '.loan', '.party', '.science',
	'.stream', '.webcam', '.online', '.site', '.website', '.space',
	'.club', '.buzz', '.info'
	]

	# Suspicious extensions (Rule 13)
	suspicious_extensions = ['.exe', '.zip', '.rar', '.js', '.php', '.asp', '.aspx', '.jsp', '.sh']

	# Phishing keywords (Rule 11, your full list)
	phishing_keywords = [
	'login', 'verify', 'secure', 'account', 'update', 'confirm', 'authenticate',
	'free', 'bonus', 'offer', 'prize', 'winner', 'gift', 'coupon', 'discount',
	'bank', 'paypal', 'creditcard', 'mastercard', 'visa', 'amex', 'westernunion',
	'signin', 'click', 'password', 'unlock', 'recover', 'validate', 'urgency',
	'limitedtime', 'expires', 'suspicious', 'alert', 'important', 'actionrequired'
	]

	# ---- Rules 5–14 ----
	rules = {
	5: r"https?://\d{1,3}(\.\d{1,3}){3}",
	6: r"@[A-Za-z0-9.-]+\.[A-Za-z]{2,}",
	7: r"(free money\|win now\|click here)",
	8: r"https?://[^\s]*\.(ru\|cn\|tk)",
	9: r"https?://.{0,6}\..{2,6}/.{0,6}",
	10: r"[0-9]{10,}",
	12: r"https?://[^\s]*@[^\s]+",
	13: r"https?://[^\s]*//[^\s]+",
	14: r"https?://[^\s]\?(?:[^=]+=[^&]&){5,}",
	}


	# ---- Gibberish Check Helper (Rule 15) ----
	def is_gibberish_word(word):
	vowels = "aeiou"
	v_count = sum(c in vowels for c in word)
	return v_count / len(word) < 0.25

	# # ---- Utility: Extract words from URL ----
	# def extract_words(url):
	# parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url)
	# raw = parsed.netloc.replace('-', '') + parsed.path.replace('-', '')
	# # Split using wordninja
	# words = wordninja.split(raw.lower())
	# # Keep only alphabetic words of length >= 3
	# words = [w for w in words if w.isalpha() and len(w) >= 3]
	# return words
	# ---- Extract words from URL ----
	def extract_words(url):
	parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url)
	parts = re.split(r'\W+', parsed.netloc + parsed.path)
	final_words = []
	for word in parts:
	if len(word) > 2 and word.isalpha():
	split_words = wordninja.split(word.lower())
	if len(split_words) <= 1:
	split_words = [word.lower()]
	final_words.extend(split_words)
	return final_words


	# --- Your original predict function, now inside the Flask app ---
	@app.route("/predict", methods=["POST"])
	def predict():
	try:
	data = request.get_json()
	url = data.get("url", "").lower()
	if not url:
	return jsonify({'error': 'No URL provided'}), 400

	parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url)
	path = parsed.path

	# ---- SpellChecker using built-in dictionary ----
	spell = SpellChecker(distance=1)

	# ---- Extract words and check spelling ----
	words = extract_words(url)
	# ignore known TLDs
	tlds_to_ignore = [tld.replace('.', '',"/") for tld in trusted_tlds + bad_tlds]
	words_for_spellcheck = [w for w in words if w not in tlds_to_ignore]

	misspelled = spell.unknown(words_for_spellcheck)
	steps = [{"word": w, "valid": (w not in misspelled) or (w in tlds_to_ignore)} for w in words]

	if misspelled:
	return jsonify({
	"prediction": 1,
	"reason": f"🧾 Spelling errors: {', '.join(misspelled)}",
	"steps": steps
	})
	else:
	return jsonify({
	"prediction": 0,
	"reason": "✅ No spelling issues",
	"steps": steps
	})

	except Exception as e:
	return jsonify({'error': f"An issue occurred during spell checking: {str(e)}"}), 500




	@app.route('/naive_bayes')
	def naive_bayes_page():
	return render_template('naive_bayes_viz.html')

	# --- New Naive Bayes Prediction Route ---
	@app.route('/nb_visual_predict', methods=['POST'])
	def nb_visual_predict():
	try:
	data = request.json
	labeled_points = data['points']
	test_point = data['test_point']

	df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class'])
	X = df[['X1', 'X2']]
	y = df['Class']

	# Ensure enough data and at least two classes for classification
	if X.empty or len(X) < 2:
	return jsonify({'error': 'Not enough data points to train the model.'}), 400
	if len(y.unique()) < 2:
	return jsonify({'error': 'Need at least two different classes to classify.'}), 400

	# Train Gaussian Naive Bayes Model
	# GaussianNB is suitable for continuous data
	nb_model = GaussianNB()
	nb_model.fit(X, y)

	# Predict for the test point
	test_point_np = np.array(test_point).reshape(1, -1)
	prediction = int(nb_model.predict(test_point_np)[0])

	# Generate data for the decision boundary
	x_min, x_max = X['X1'].min(), X['X1'].max()
	y_min, y_max = X['X2'].min(), X['X2'].max()

	x_buffer = 1.0 if x_max - x_min == 0 else (x_max - x_min) * 0.1
	y_buffer = 1.0 if y_max - y_min == 0 else (y_max - y_min) * 0.1

	x_min -= x_buffer
	x_max += x_buffer
	y_min -= y_buffer
	y_max += y_buffer

	x_min = min(x_min, test_point_np[0,0] - 0.5)
	x_max = max(x_max, test_point_np[0,0] + 0.5)
	y_min = min(y_min, test_point_np[0,1] - 0.5)
	y_max = max(y_max, test_point_np[0,1] + 0.5)

	xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
	np.linspace(y_min, y_max, 100))

	if xx.size == 0 or yy.size == 0:
	return jsonify({'error': 'Meshgrid could not be created. Data range too narrow.'}), 400

	# Predict class for each point in the meshgrid
	# Use predict_proba and then argmax to get class for decision boundary coloring
	Z = nb_model.predict(np.c_[xx.ravel(), yy.ravel()])
	Z = Z.reshape(xx.shape)

	decision_boundary_z = Z.tolist()
	decision_boundary_x_coords = xx[0, :].tolist()
	decision_boundary_y_coords = yy[:, 0].tolist()

	return jsonify({
	'prediction': prediction,
	'decision_boundary_z': decision_boundary_z,
	'decision_boundary_x_coords': decision_boundary_x_coords,
	'decision_boundary_y_coords': decision_boundary_y_coords
	})
	except Exception as e:
	print(f"An error occurred in /nb_visual_predict: {e}")
	return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500

	def check_with_virustotal(url):
	try:
	headers = {"x-apikey": VT_API_KEY}
	submit_url = "https://www.virustotal.com/api/v3/urls"

	# Submit the URL for scanning
	response = requests.post(submit_url, headers=headers, data={"url": url})
	url_id = response.json()["data"]["id"]

	# Fetch result
	result = requests.get(f"{submit_url}/{url_id}", headers=headers)
	data = result.json()

	stats = data["data"]["attributes"]["last_analysis_stats"]
	malicious_count = stats.get("malicious", 0)

	if malicious_count > 0:
	return True, f"☣️ VirusTotal flagged it as malicious ({malicious_count} engines)"
	return False, None
	except Exception as e:
	print(f"⚠️ VirusTotal error: {e}")



	return False, None










	@app.route('/kmeans-clustering')
	def clustering():
	return render_template('clustering.html')

	#image code
	@app.route('/kmeans-Dbscan-image', methods=['GET', 'POST'])
	def compress_and_clean():
	final_image = None

	if request.method == 'POST':
	try:
	# Get form values
	mode = request.form.get('mode', 'compress')
	k = int(request.form.get('k', 8))
	eps = float(request.form.get('eps', 0.6))
	min_samples = int(request.form.get('min_samples', 50))
	image_file = request.files.get('image')

	if image_file and image_file.filename != '':
	# Load image
	img = Image.open(image_file).convert('RGB')
	max_size = (518, 518)
	img.thumbnail(max_size, Image.Resampling.LANCZOS)

	img_np = np.array(img)
	h, w, d = img_np.shape
	pixels = img_np.reshape(-1, d)

	# Apply KMeans
	kmeans = KMeans(n_clusters=k, random_state=42, n_init=10)
	kmeans.fit(pixels)
	clustered_pixels = kmeans.cluster_centers_[kmeans.labels_].astype(np.uint8)

	# Mode 1: Just Compress
	if mode == 'compress':
	final_pixels = clustered_pixels.reshape(h, w, d)

	# Mode 2: Compress + Clean (KMeans + DBSCAN)
	else:
	# Sample to avoid MemoryError
	max_dbscan_pixels = 10000
	if len(clustered_pixels) > max_dbscan_pixels:
	idx = np.random.choice(len(clustered_pixels), max_dbscan_pixels, replace=False)
	dbscan_input = clustered_pixels[idx]
	else:
	dbscan_input = clustered_pixels

	# DBSCAN
	# For DBSCAN: use only 10,000 pixels max
	max_dbscan_pixels = 10000

	scaler = StandardScaler()
	pixels_scaled = scaler.fit_transform(dbscan_input)
	db = DBSCAN(eps=eps, min_samples=min_samples)
	labels = db.fit_predict(pixels_scaled)

	# Clean noisy pixels
	clean_pixels = []
	for i in range(len(dbscan_input)):
	label = labels[i]
	clean_pixels.append([0, 0, 0] if label == -1 else dbscan_input[i])

	# Fill extra if sampling was used
	if len(clustered_pixels) > max_dbscan_pixels:
	clean_pixels.extend([[0, 0, 0]] * (len(clustered_pixels) - len(clean_pixels)))

	final_pixels = np.array(clean_pixels, dtype=np.uint8).reshape(h, w, d)

	# Save final image
	final_img = Image.fromarray(final_pixels)
	final_image = 'compressed_clean.jpg'
	final_img.save(os.path.join(app.config['UPLOAD_FOLDER'], final_image), optimize=True, quality=90)

	except Exception as e:
	return f"⚠️ Error: {str(e)}", 500

	return render_template('kmean-dbscan-image.html', final_image=final_image)

	@app.route('/DBscan')
	def DBSCAN():
	return render_template('DBSCAN.html')


	if __name__ == "__main__":
	app.run(host="0.0.0.0", port=7860)