|
|
from flask import Flask, render_template, request, jsonify |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import joblib |
|
|
import os |
|
|
from sklearn.svm import SVR |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.metrics import mean_squared_error, r2_score |
|
|
from sklearn.neighbors import KNeighborsClassifier |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
from sklearn.tree import DecisionTreeClassifier |
|
|
from sklearn import svm |
|
|
from sklearn.naive_bayes import GaussianNB |
|
|
from sklearn.feature_extraction.text import CountVectorizer |
|
|
from textblob import TextBlob |
|
|
import traceback |
|
|
from flask_cors import CORS |
|
|
from werkzeug.utils import secure_filename |
|
|
import io |
|
|
import re |
|
|
from sklearn.cluster import KMeans, DBSCAN |
|
|
from PIL import Image |
|
|
import matplotlib.pyplot as plt |
|
|
from joblib import load |
|
|
import traceback |
|
|
import pickle |
|
|
from sklearn.svm import SVC |
|
|
from sklearn.datasets import make_classification |
|
|
import plotly.graph_objs as go |
|
|
import json |
|
|
import requests |
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
|
|
|
from dotenv import load_dotenv |
|
|
import os |
|
|
from urllib.parse import urlparse |
|
|
import tldextract |
|
|
import string |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
import nltk, os |
|
|
|
|
|
|
|
|
nltk.data.path.append(os.path.join(os.path.dirname(__file__), "nltk_data")) |
|
|
|
|
|
from nltk.corpus import words |
|
|
|
|
|
|
|
|
valid_words = set(words.words()) |
|
|
print("engineering" in valid_words) |
|
|
print("engineerigfnnxng" in valid_words) |
|
|
import wordninja |
|
|
import re |
|
|
from urllib.parse import urlparse |
|
|
from spellchecker import SpellChecker |
|
|
|
|
|
import wordninja |
|
|
|
|
|
import google.generativeai as genai |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from huggingface_hub import hf_hub_download |
|
|
import joblib |
|
|
import numpy as np |
|
|
import torch |
|
|
|
|
|
REPO_ID = "deedrop1140/Neroml" |
|
|
|
|
|
def load_file(filename): |
|
|
"""Download a file from Hugging Face Hub and load it with the right library.""" |
|
|
file_path = hf_hub_download(repo_id=REPO_ID, filename=filename) |
|
|
|
|
|
if filename.endswith(".pkl") or filename.endswith(".joblib"): |
|
|
return joblib.load(file_path) |
|
|
elif filename.endswith(".npy"): |
|
|
return np.load(file_path, allow_pickle=True) |
|
|
elif filename.endswith(".pt") or filename.endswith(".pth"): |
|
|
return torch.load(file_path) |
|
|
else: |
|
|
return file_path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_DIR = "Models" |
|
|
DATA_DIR = "housedata" |
|
|
UPLOAD_FOLDER = 'static/uploads' |
|
|
|
|
|
app = Flask(__name__) |
|
|
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER |
|
|
CORS(app) |
|
|
|
|
|
|
|
|
|
|
|
genai.configure(api_key=os.getenv("GEMINI_API_KEY")) |
|
|
|
|
|
def ask_gemini(statement): |
|
|
model = genai.GenerativeModel("gemini-2.0-flash-001") |
|
|
response = model.generate_content(f"Verify this statement for truth: {statement}") |
|
|
return response.text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
X, y = make_classification(n_samples=100, n_features=2, n_redundant=0, |
|
|
n_clusters_per_class=1, n_classes=2, random_state=42) |
|
|
scaler = StandardScaler() |
|
|
X = scaler.fit_transform(X) |
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
|
|
|
|
|
|
svm_model = SVC(kernel="linear") |
|
|
svm_model.fit(X_train, y_train) |
|
|
|
|
|
|
|
|
|
|
|
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") |
|
|
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent" |
|
|
|
|
|
|
|
|
|
|
|
os.makedirs(MODEL_DIR, exist_ok=True) |
|
|
os.makedirs(DATA_DIR, exist_ok=True) |
|
|
os.makedirs(UPLOAD_FOLDER, exist_ok=True) |
|
|
|
|
|
def clean_text(text): |
|
|
if pd.isnull(text): |
|
|
return "" |
|
|
text = text.lower() |
|
|
text = re.sub(r"http\S+|www\S+|https\S+", '', text) |
|
|
text = text.translate(str.maketrans('', '', string.punctuation)) |
|
|
text = re.sub(r'\d+', '', text) |
|
|
text = re.sub(r'\s+', ' ', text).strip() |
|
|
return text |
|
|
|
|
|
|
|
|
def generate_linear_data(n_samples=100, noise=0.5): |
|
|
X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) |
|
|
y = 2 * X.squeeze() + 5 + noise * np.random.randn(n_samples) |
|
|
return X, y |
|
|
|
|
|
def generate_non_linear_data(n_samples=100, noise=0.5): |
|
|
X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) |
|
|
y = np.sin(X.squeeze()) * 10 + noise * np.random.randn(n_samples) |
|
|
return X, y |
|
|
|
|
|
def generate_noisy_data(n_samples=100, noise_factor=3.0): |
|
|
X = np.sort(np.random.rand(n_samples) * 10).reshape(-1, 1) |
|
|
y = 2 * X.squeeze() + 5 + noise_factor * np.random.randn(n_samples) |
|
|
return X, y |
|
|
|
|
|
|
|
|
def get_house_data(): |
|
|
try: |
|
|
df = pd.read_csv(os.path.join(DATA_DIR, 'train.csv')) |
|
|
|
|
|
features = ['GrLivArea', 'OverallQual', 'GarageCars', 'TotalBsmtSF', 'YearBuilt'] |
|
|
|
|
|
if not all(col in df.columns for col in features + ['SalePrice']): |
|
|
print("Warning: Missing one or more required columns in train.csv for house data.") |
|
|
return None, None |
|
|
X = df[features] |
|
|
y = df['SalePrice'] |
|
|
return X, y |
|
|
except FileNotFoundError: |
|
|
print(f"Error: train.csv not found in {DATA_DIR}. Please ensure your data is there.") |
|
|
return None, None |
|
|
except Exception as e: |
|
|
print(f"Error loading house data: {e}") |
|
|
return None, None |
|
|
|
|
|
|
|
|
loaded_models = {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
VT_API_KEY = os.getenv("VT_API_KEY") |
|
|
nb_model = load_file("Models/nb_url_model.pkl") |
|
|
vectorizer = load_file("Models/nb_url_vectorizer.pkl") |
|
|
|
|
|
if nb_model is not None and vectorizer is not None: |
|
|
print("✅ Loaded model and vectorizer.") |
|
|
else: |
|
|
print("❌ Model or vectorizer not found.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_all_models(): |
|
|
""" |
|
|
Loads all necessary models into the loaded_models dictionary when the app starts. |
|
|
""" |
|
|
global loaded_models |
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
supervised_model_path = load_file("Models/supervised_model.pkl") |
|
|
|
|
|
|
|
|
print("DEBUG -> supervised_model_path type:", type(supervised_model_path)) |
|
|
|
|
|
|
|
|
if isinstance(supervised_model_path, str): |
|
|
loaded_models['supervised'] = joblib.load(supervised_model_path) |
|
|
else: |
|
|
|
|
|
loaded_models['supervised'] = supervised_model_path |
|
|
|
|
|
print("Supervised model loaded successfully") |
|
|
|
|
|
except FileNotFoundError: |
|
|
print(f"Error: Supervised model file not found at {supervised_model_path}. " |
|
|
"Please run train_model.py first.") |
|
|
loaded_models['supervised'] = None |
|
|
except Exception as e: |
|
|
print(f"Error loading supervised model: {e}") |
|
|
loaded_models['supervised'] = None |
|
|
|
|
|
|
|
|
|
|
|
with app.app_context(): |
|
|
load_all_models() |
|
|
|
|
|
@app.route('/') |
|
|
def home(): |
|
|
return render_template('home.html') |
|
|
|
|
|
@app.route('/supervised', methods=['GET', 'POST']) |
|
|
def supervised(): |
|
|
prediction = None |
|
|
hours_studied_input = None |
|
|
|
|
|
if loaded_models['supervised'] is None: |
|
|
return "Error: Supervised model could not be loaded. Please check server logs.", 500 |
|
|
|
|
|
if request.method == 'POST': |
|
|
try: |
|
|
hours_studied_input = float(request.form['hours']) |
|
|
input_data = np.array([[hours_studied_input]]) |
|
|
|
|
|
predicted_score = loaded_models['supervised'].predict(input_data)[0] |
|
|
prediction = round(predicted_score, 2) |
|
|
|
|
|
except ValueError: |
|
|
print("Invalid input for hours studied.") |
|
|
prediction = "Error: Please enter a valid number." |
|
|
except Exception as e: |
|
|
print(f"An error occurred during prediction: {e}") |
|
|
prediction = "Error during prediction." |
|
|
|
|
|
return render_template('supervised.html', prediction=prediction, hours_studied_input=hours_studied_input) |
|
|
|
|
|
|
|
|
@app.route('/polynomial', methods=['GET', 'POST']) |
|
|
def polynomial(): |
|
|
if request.method == 'POST': |
|
|
try: |
|
|
hours = float(request.form['hours']) |
|
|
|
|
|
|
|
|
|
|
|
model = load_file("Models/poly_model.pkl") |
|
|
poly= load_file("Models/poly_transform.pkl") |
|
|
|
|
|
transformed_input = poly.transform([[hours]]) |
|
|
prediction = model.predict(transformed_input)[0] |
|
|
|
|
|
return render_template("poly.html", prediction=round(prediction, 2), hours=hours) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error: {e}") |
|
|
return render_template("poly.html", error="Something went wrong.") |
|
|
|
|
|
return render_template("poly.html") |
|
|
|
|
|
|
|
|
@app.route('/random_forest', methods=['GET', 'POST']) |
|
|
def random_forest(): |
|
|
if request.method == 'POST': |
|
|
try: |
|
|
hours = float(request.form['hours']) |
|
|
model = load_file("Models/rf_model.pkl") |
|
|
|
|
|
prediction = model.predict([[hours]])[0] |
|
|
|
|
|
return render_template("rf.html", prediction=round(prediction, 2), hours=hours) |
|
|
except Exception as e: |
|
|
print(f"[ERROR] {e}") |
|
|
return render_template("rf.html", error="Prediction failed. Check your input.") |
|
|
return render_template("rf.html") |
|
|
|
|
|
@app.route('/prediction_flow') |
|
|
def prediction_flow(): |
|
|
return render_template('prediction_flow.html') |
|
|
|
|
|
@app.route("/lasso", methods=["GET", "POST"]) |
|
|
def lasso(): |
|
|
if request.method == "POST": |
|
|
try: |
|
|
inputs = [float(request.form.get(f)) for f in ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt']] |
|
|
|
|
|
model = load_file("Models/lasso_model.pkl") |
|
|
scaler = load_file("Models/lasso_scaler.pkl") |
|
|
|
|
|
|
|
|
|
|
|
scaled_input = scaler.transform([inputs]) |
|
|
|
|
|
prediction = model.predict(scaled_input)[0] |
|
|
return render_template("lasso.html", prediction=round(prediction, 2)) |
|
|
|
|
|
except Exception as e: |
|
|
return render_template("lasso.html", error=str(e)) |
|
|
|
|
|
return render_template("lasso.html") |
|
|
|
|
|
|
|
|
@app.route('/ridge', methods=['GET', 'POST']) |
|
|
def ridge(): |
|
|
prediction = None |
|
|
error = None |
|
|
|
|
|
try: |
|
|
model = load_file("Models/ridge_model.pkl") |
|
|
scaler = load_file("Models/ridge_scaler.pkl") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
return f"❌ Error loading Ridge model: {e}", 500 |
|
|
|
|
|
if request.method == 'POST': |
|
|
try: |
|
|
features = ['OverallQual', 'GrLivArea', 'GarageCars', 'TotalBsmtSF', 'YearBuilt'] |
|
|
input_data = [float(request.form[feature]) for feature in features] |
|
|
input_scaled = scaler.transform([input_data]) |
|
|
prediction = model.predict(input_scaled)[0] |
|
|
except Exception as e: |
|
|
error = str(e) |
|
|
|
|
|
return render_template('ridge.html', prediction=prediction, error=error) |
|
|
|
|
|
|
|
|
|
|
|
@app.route('/svr') |
|
|
def svr_page(): |
|
|
return render_template('svr.html') |
|
|
|
|
|
|
|
|
@app.route('/run_svr_demo', methods=['POST']) |
|
|
def run_svr_demo(): |
|
|
try: |
|
|
|
|
|
if request.is_json: |
|
|
data = request.json |
|
|
else: |
|
|
|
|
|
data = request.form |
|
|
|
|
|
dataset_type = data.get('dataset_type', 'linear') |
|
|
kernel_type = data.get('kernel', 'rbf') |
|
|
C_param = float(data.get('C', 1.0)) |
|
|
gamma_param = float(data.get('gamma', 0.1)) |
|
|
epsilon_param = float(data.get('epsilon', 0.1)) |
|
|
|
|
|
X, y = None, None |
|
|
|
|
|
if dataset_type == 'linear': |
|
|
X, y = generate_linear_data() |
|
|
elif dataset_type == 'non_linear': |
|
|
X, y = generate_non_linear_data() |
|
|
elif dataset_type == 'noisy': |
|
|
X, y = generate_noisy_data() |
|
|
elif dataset_type == 'house_data': |
|
|
X_house, y_house = get_house_data() |
|
|
if X_house is not None and not X_house.empty: |
|
|
X = X_house[['GrLivArea']].values |
|
|
y = y_house.values |
|
|
else: |
|
|
X, y = generate_linear_data() |
|
|
elif dataset_type == 'custom_csv': |
|
|
uploaded_file = request.files.get('file') |
|
|
x_column_name = data.get('x_column_name') |
|
|
y_column_name = data.get('y_column_name') |
|
|
|
|
|
if not uploaded_file or uploaded_file.filename == '': |
|
|
return jsonify({'error': 'No file uploaded for custom CSV.'}), 400 |
|
|
if not x_column_name or not y_column_name: |
|
|
return jsonify({'error': 'X and Y column names are required for custom CSV.'}), 400 |
|
|
|
|
|
try: |
|
|
|
|
|
df = pd.read_csv(io.BytesIO(uploaded_file.read())) |
|
|
|
|
|
if x_column_name not in df.columns or y_column_name not in df.columns: |
|
|
missing_cols = [] |
|
|
if x_column_name not in df.columns: missing_cols.append(x_column_name) |
|
|
if y_column_name not in df.columns: missing_cols.append(y_column_name) |
|
|
return jsonify({'error': f"Missing columns in uploaded CSV: {', '.join(missing_cols)}"}), 400 |
|
|
|
|
|
X = df[[x_column_name]].values |
|
|
y = df[y_column_name].values |
|
|
except Exception as e: |
|
|
return jsonify({'error': f"Error reading or processing custom CSV: {str(e)}"}), 400 |
|
|
else: |
|
|
X, y = generate_linear_data() |
|
|
|
|
|
|
|
|
if X is None or y is None or len(X) == 0: |
|
|
return jsonify({'error': 'Failed to generate or load dataset.'}), 500 |
|
|
|
|
|
|
|
|
scaler_X = StandardScaler() |
|
|
scaler_y = StandardScaler() |
|
|
|
|
|
X_scaled = scaler_X.fit_transform(X) |
|
|
y_scaled = scaler_y.fit_transform(y.reshape(-1, 1)).flatten() |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42) |
|
|
|
|
|
|
|
|
svr_model = SVR(kernel=kernel_type, C=C_param, gamma=gamma_param, epsilon=epsilon_param) |
|
|
svr_model.fit(X_train, y_train) |
|
|
|
|
|
|
|
|
y_pred_scaled = svr_model.predict(X_test) |
|
|
|
|
|
|
|
|
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten() |
|
|
y_test_original = scaler_y.inverse_transform(y_test.reshape(-1, 1)).flatten() |
|
|
|
|
|
|
|
|
mse = mean_squared_error(y_test_original, y_pred) |
|
|
r2 = r2_score(y_test_original, y_pred) |
|
|
support_vectors_count = len(svr_model.support_vectors_) |
|
|
|
|
|
|
|
|
plot_X_original = scaler_X.inverse_transform(X_scaled) |
|
|
plot_y_original = scaler_y.inverse_transform(y_scaled.reshape(-1, 1)).flatten() |
|
|
|
|
|
x_plot = np.linspace(plot_X_original.min(), plot_X_original.max(), 500).reshape(-1, 1) |
|
|
x_plot_scaled = scaler_X.transform(x_plot) |
|
|
y_plot_scaled = svr_model.predict(x_plot_scaled) |
|
|
y_plot_original = scaler_y.inverse_transform(y_plot_scaled.reshape(-1, 1)).flatten() |
|
|
|
|
|
y_upper_scaled = y_plot_scaled + epsilon_param |
|
|
y_lower_scaled = y_plot_scaled - epsilon_param |
|
|
y_upper_original = scaler_y.inverse_transform(y_upper_scaled.reshape(-1, 1)).flatten() |
|
|
y_lower_original = scaler_y.inverse_transform(y_lower_scaled.reshape(-1, 1)).flatten() |
|
|
|
|
|
plot_data = { |
|
|
'data': [ |
|
|
{ |
|
|
'x': plot_X_original.flatten().tolist(), |
|
|
'y': plot_y_original.tolist(), |
|
|
'mode': 'markers', |
|
|
'type': 'scatter', |
|
|
'name': 'Original Data' |
|
|
}, |
|
|
{ |
|
|
'x': x_plot.flatten().tolist(), |
|
|
'y': y_plot_original.tolist(), |
|
|
'mode': 'lines', |
|
|
'type': 'scatter', |
|
|
'name': 'SVR Prediction', |
|
|
'line': {'color': 'red'} |
|
|
}, |
|
|
{ |
|
|
'x': x_plot.flatten().tolist(), |
|
|
'y': y_upper_original.tolist(), |
|
|
'mode': 'lines', |
|
|
'type': 'scatter', |
|
|
'name': 'Epsilon Tube (Upper)', |
|
|
'line': {'dash': 'dash', 'color': 'green'}, |
|
|
'fill': 'tonexty', |
|
|
'fillcolor': 'rgba(0,128,0,0.1)' |
|
|
}, |
|
|
{ |
|
|
'x': x_plot.flatten().tolist(), |
|
|
'y': y_lower_original.tolist(), |
|
|
'mode': 'lines', |
|
|
'type': 'scatter', |
|
|
'name': 'Epsilon Tube (Lower)', |
|
|
'line': {'dash': 'dash', 'color': 'green'} |
|
|
} |
|
|
], |
|
|
'layout': { |
|
|
'title': f'SVR Regression (Kernel: {kernel_type.upper()})', |
|
|
'xaxis': {'title': 'Feature Value'}, |
|
|
'yaxis': {'title': 'Target Value'}, |
|
|
'hovermode': 'closest' |
|
|
} |
|
|
} |
|
|
|
|
|
return jsonify({ |
|
|
'mse': mse, |
|
|
'r2_score': r2, |
|
|
'support_vectors_count': support_vectors_count, |
|
|
'plot_data': plot_data |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error in SVR demo: {e}") |
|
|
return jsonify({'error': str(e)}), 500 |
|
|
|
|
|
|
|
|
def clean_text(text): |
|
|
return text.lower().strip() |
|
|
|
|
|
import re |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def clean_text(text): |
|
|
text = text.lower() |
|
|
text = re.sub(r'\W', ' ', text) |
|
|
text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text) |
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
return text.strip() |
|
|
|
|
|
@app.route('/logistic', methods=['GET', 'POST']) |
|
|
def logistic(): |
|
|
prediction, confidence_percentage, cleaned, tokens, probability = None, None, None, None, None |
|
|
|
|
|
|
|
|
model = load_file("Models/logistic_model.pkl") |
|
|
vectorizer = load_file("Models/logvectorizer.pkl") |
|
|
|
|
|
if request.method == "POST": |
|
|
msg = request.form.get('message', '') |
|
|
cleaned = clean_text(msg) |
|
|
tokens = cleaned.split() |
|
|
|
|
|
|
|
|
try: |
|
|
vector = vectorizer.transform([cleaned]) |
|
|
probability = model.predict_proba(vector)[0][1] |
|
|
prediction = "Spam" if probability >= 0.5 else "Not Spam" |
|
|
confidence_percentage = round(probability * 100, 2) |
|
|
except Exception as e: |
|
|
print("Error predicting:", e) |
|
|
prediction = "Error" |
|
|
confidence_percentage = 0 |
|
|
|
|
|
return render_template( |
|
|
"logistic.html", |
|
|
prediction=prediction, |
|
|
confidence_percentage=confidence_percentage, |
|
|
cleaned=cleaned, |
|
|
tokens=tokens, |
|
|
probability=round(probability, 4) if probability else None, |
|
|
source="sms" |
|
|
) |
|
|
|
|
|
@app.route('/logistic-sms', methods=['POST']) |
|
|
def logistic_sms(): |
|
|
try: |
|
|
data = request.get_json() |
|
|
msg = data.get('message', '') |
|
|
cleaned = clean_text(msg) |
|
|
tokens = cleaned.split() |
|
|
|
|
|
vector = vectorizer.transform([cleaned]) |
|
|
probability = model.predict_proba(vector)[0][1] |
|
|
prediction = "Spam" if probability >= 0.5 else "Not Spam" |
|
|
confidence_percentage = round(probability * 100, 2) |
|
|
|
|
|
return jsonify({ |
|
|
"prediction": prediction, |
|
|
"confidence": confidence_percentage, |
|
|
"probability": round(probability, 4), |
|
|
"cleaned": cleaned, |
|
|
"tokens": tokens, |
|
|
"source": "json" |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
print("Error in /logistic-sms:", e) |
|
|
return jsonify({"error": "Internal server error", "details": str(e)}), 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/knn") |
|
|
def knn_visual(): |
|
|
return render_template("knn.html") |
|
|
|
|
|
@app.route('/knn_visual_predict', methods=['POST']) |
|
|
def knn_visual_predict(): |
|
|
data = request.get_json() |
|
|
points = np.array(data['points']) |
|
|
test_point = np.array(data['test_point']) |
|
|
k = int(data['k']) |
|
|
|
|
|
X = points[:, :2] |
|
|
y = points[:, 2].astype(int) |
|
|
|
|
|
knn = KNeighborsClassifier(n_neighbors=k) |
|
|
knn.fit(X, y) |
|
|
pred = knn.predict([test_point])[0] |
|
|
|
|
|
dists = np.linalg.norm(X - test_point, axis=1) |
|
|
neighbor_indices = np.argsort(dists)[:k] |
|
|
neighbors = X[neighbor_indices] |
|
|
|
|
|
return jsonify({ |
|
|
'prediction': int(pred), |
|
|
'neighbors': neighbors.tolist() |
|
|
}) |
|
|
|
|
|
|
|
|
@app.route("/knn_image") |
|
|
def knn_image_page(): |
|
|
return render_template("knn_image.html") |
|
|
|
|
|
from PIL import Image |
|
|
|
|
|
@app.route("/predict_image", methods=["POST"]) |
|
|
def predict_image(): |
|
|
if "image" not in request.files: |
|
|
return jsonify({"error": "No image uploaded"}), 400 |
|
|
|
|
|
file = request.files["image"] |
|
|
|
|
|
try: |
|
|
|
|
|
image = Image.open(file.stream).convert("RGB") |
|
|
image = image.resize((32, 32)) |
|
|
img_array = np.array(image).flatten().reshape(1, -1) |
|
|
except Exception as e: |
|
|
return jsonify({"error": f"Invalid image. {str(e)}"}), 400 |
|
|
|
|
|
|
|
|
|
|
|
model = load_file("Models/knn_model.pkl") |
|
|
label_classes = load_file("Models/lasso_model.pkl") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
probs = model.predict_proba(img_array)[0] |
|
|
pred_index = np.argmax(probs) |
|
|
pred_label = label_classes[pred_index] |
|
|
confidence = round(float(probs[pred_index]) * 100, 2) |
|
|
|
|
|
return jsonify({ |
|
|
"prediction": str(pred_label), |
|
|
"confidence": f"{confidence}%", |
|
|
"all_probabilities": { |
|
|
str(label_classes[i]): round(float(probs[i]) * 100, 2) |
|
|
for i in range(len(probs)) |
|
|
} |
|
|
}) |
|
|
|
|
|
@app.route("/rfc") |
|
|
def random_forest_page(): |
|
|
return render_template("Random_Forest_Classifier.html") |
|
|
|
|
|
@app.route('/rf_visual_predict', methods=['POST']) |
|
|
def rf_visual_predict(): |
|
|
try: |
|
|
data = request.get_json() |
|
|
print("📦 Incoming JSON data:", data) |
|
|
|
|
|
labeled_points = data.get('points') |
|
|
test_point = data.get('test_point') |
|
|
|
|
|
if not labeled_points or not test_point: |
|
|
return jsonify({"error": "Missing points or test_point"}), 400 |
|
|
|
|
|
df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) |
|
|
X = df[['X1', 'X2']] |
|
|
y = df['Class'] |
|
|
|
|
|
rf_model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42) |
|
|
rf_model.fit(X, y) |
|
|
|
|
|
test_point_np = np.array(test_point).reshape(1, -1) |
|
|
prediction = int(rf_model.predict(test_point_np)[0]) |
|
|
|
|
|
x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 |
|
|
y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 |
|
|
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), |
|
|
np.linspace(y_min, y_max, 100)) |
|
|
|
|
|
Z = rf_model.predict(np.c_[xx.ravel(), yy.ravel()]) |
|
|
Z = Z.reshape(xx.shape) |
|
|
|
|
|
return jsonify({ |
|
|
'prediction': prediction, |
|
|
'decision_boundary_z': Z.tolist(), |
|
|
'decision_boundary_x_coords': xx[0, :].tolist(), |
|
|
'decision_boundary_y_coords': yy[:, 0].tolist() |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
import traceback |
|
|
print("❌ Exception in /rf_visual_predict:") |
|
|
traceback.print_exc() |
|
|
return jsonify({"error": str(e)}), 500 |
|
|
|
|
|
@app.route("/liar") |
|
|
def liar_input_page(): |
|
|
return render_template("rfc_liar_predict.html") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/ref/liar/predictor", methods=["POST"]) |
|
|
def liar_predictor(): |
|
|
try: |
|
|
data = request.get_json() |
|
|
statement = data.get("statement", "") |
|
|
|
|
|
if not statement: |
|
|
return jsonify({"success": False, "error": "Missing statement"}), 400 |
|
|
|
|
|
try: |
|
|
|
|
|
features = vectorizer.transform([statement]) |
|
|
prediction = model.predict(features)[0] |
|
|
|
|
|
liar_label_map = { |
|
|
0: "It can be false 🔥", |
|
|
1: "False ❌", |
|
|
2: "Mostly false but can be true 🤏", |
|
|
3: "Half True 🌓", |
|
|
4: "Mostly True 👍", |
|
|
5: "True ✅" |
|
|
} |
|
|
|
|
|
prediction_label = liar_label_map.get(int(prediction), "Unknown") |
|
|
|
|
|
except ValueError as ve: |
|
|
if "features" in str(ve): |
|
|
|
|
|
prediction_label = ask_gemini(statement) |
|
|
else: |
|
|
raise ve |
|
|
|
|
|
|
|
|
bert_result = bert_checker(statement)[0] |
|
|
bert_label = bert_result["label"] |
|
|
bert_score = round(bert_result["score"] * 100, 2) |
|
|
|
|
|
science_label_map = { |
|
|
"LABEL_0": "✅ Scientifically Possible", |
|
|
"LABEL_1": "❌ Scientifically Impossible" |
|
|
} |
|
|
|
|
|
scientific_check = f"{science_label_map.get(bert_label, bert_label)} ({bert_score:.2f}%)" |
|
|
|
|
|
return jsonify({ |
|
|
"success": True, |
|
|
"prediction": prediction_label, |
|
|
"reason": "Predicted from linguistic and content-based patterns, or Gemini fallback.", |
|
|
"scientific_check": scientific_check |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
traceback.print_exc() |
|
|
return jsonify({"success": False, "error": str(e)}), 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/svm") |
|
|
def svm_page(): |
|
|
return render_template("svm.html") |
|
|
|
|
|
@app.route('/svm_visual_predict', methods=['POST']) |
|
|
def svm_visual_predict(): |
|
|
data = request.json |
|
|
labeled_points = data['points'] |
|
|
test_point = data['test_point'] |
|
|
svm_type = data['svm_type'] |
|
|
c_param = float(data['c_param']) |
|
|
gamma_param = float(data['gamma_param']) |
|
|
|
|
|
df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) |
|
|
X = df[['X1', 'X2']] |
|
|
y = df['Class'] |
|
|
|
|
|
|
|
|
if svm_type == 'linear': |
|
|
svm_model = svm.SVC(kernel='linear', C=c_param, random_state=42) |
|
|
elif svm_type == 'rbf': |
|
|
svm_model = svm.SVC(kernel='rbf', C=c_param, gamma=gamma_param, random_state=42) |
|
|
else: |
|
|
return jsonify({'error': 'Invalid SVM type'}), 400 |
|
|
|
|
|
svm_model.fit(X, y) |
|
|
|
|
|
|
|
|
test_point_np = np.array(test_point).reshape(1, -1) |
|
|
prediction = int(svm_model.predict(test_point_np)[0]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
support_vectors = svm_model.support_vectors_.tolist() |
|
|
|
|
|
|
|
|
|
|
|
x_min, x_max = X['X1'].min() - 1, X['X1'].max() + 1 |
|
|
y_min, y_max = X['X2'].min() - 1, X['X2'].max() + 1 |
|
|
|
|
|
|
|
|
x_min = min(x_min, test_point_np[0,0] - 1) |
|
|
x_max = max(x_max, test_point_np[0,0] + 1) |
|
|
y_min = min(y_min, test_point_np[0,1] - 1) |
|
|
y_max = max(y_max, test_point_np[0,1] + 1) |
|
|
|
|
|
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), |
|
|
np.linspace(y_min, y_max, 100)) |
|
|
|
|
|
|
|
|
Z = svm_model.predict(np.c_[xx.ravel(), yy.ravel()]) |
|
|
Z = Z.reshape(xx.shape) |
|
|
|
|
|
|
|
|
decision_boundary_z = Z.tolist() |
|
|
decision_boundary_x_coords = xx[0, :].tolist() |
|
|
decision_boundary_y_coords = yy[:, 0].tolist() |
|
|
|
|
|
return jsonify({ |
|
|
'prediction': prediction, |
|
|
'decision_boundary_z': decision_boundary_z, |
|
|
'decision_boundary_x_coords': decision_boundary_x_coords, |
|
|
'decision_boundary_y_coords': decision_boundary_y_coords, |
|
|
'support_vectors': support_vectors |
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/api/explain', methods=['POST']) |
|
|
def explain(): |
|
|
|
|
|
|
|
|
|
|
|
if not GEMINI_API_KEY and not os.getenv("FLASK_ENV") == "development": |
|
|
return jsonify({'error': 'Missing API key'}), 500 |
|
|
|
|
|
payload = request.get_json() |
|
|
|
|
|
try: |
|
|
response = requests.post( |
|
|
f"{GEMINI_URL}?key={GEMINI_API_KEY}", |
|
|
headers={"Content-Type": "application/json"}, |
|
|
json=payload |
|
|
) |
|
|
response.raise_for_status() |
|
|
return jsonify(response.json()) |
|
|
except requests.exceptions.RequestException as e: |
|
|
app.logger.error(f"Error calling Gemini API: {e}") |
|
|
return jsonify({'error': str(e)}), 500 |
|
|
|
|
|
@app.route('/decision_tree') |
|
|
def decision_tree_page(): |
|
|
|
|
|
|
|
|
return render_template('decision_tree.html') |
|
|
|
|
|
|
|
|
@app.route('/game') |
|
|
def decision_tree_game(): |
|
|
"""Renders the interactive game page for decision trees.""" |
|
|
return render_template('decision_tree_game.html') |
|
|
|
|
|
@app.route('/dt_visual_predict', methods=['POST']) |
|
|
def dt_visual_predict(): |
|
|
try: |
|
|
data = request.json |
|
|
labeled_points = data['points'] |
|
|
test_point = data['test_point'] |
|
|
max_depth = int(data['max_depth']) |
|
|
|
|
|
|
|
|
df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) |
|
|
X = df[['X1', 'X2']] |
|
|
y = df['Class'] |
|
|
|
|
|
|
|
|
if X.empty or len(X) < 2: |
|
|
return jsonify({'error': 'Not enough data points to train the model.'}), 400 |
|
|
|
|
|
|
|
|
dt_model = DecisionTreeClassifier(max_depth=max_depth, random_state=42) |
|
|
dt_model.fit(X, y) |
|
|
|
|
|
|
|
|
test_point_np = np.array(test_point).reshape(1, -1) |
|
|
prediction = int(dt_model.predict(test_point_np)[0]) |
|
|
|
|
|
|
|
|
x_min, x_max = X['X1'].min(), X['X1'].max() |
|
|
y_min, y_max = X['X2'].min(), X['X2'].max() |
|
|
|
|
|
|
|
|
|
|
|
x_buffer = 1.0 if (x_max - x_min) == 0 else (x_max - x_min) * 0.1 |
|
|
y_buffer = 1.0 if (y_max - y_min) == 0 else (y_max - y_min) * 0.1 |
|
|
|
|
|
x_min -= x_buffer |
|
|
x_max += x_buffer |
|
|
y_min -= y_buffer |
|
|
y_max += y_buffer |
|
|
|
|
|
|
|
|
x_min = min(x_min, test_point_np[0,0] - 0.5) |
|
|
x_max = max(x_max, test_point_np[0,0] + 0.5) |
|
|
y_min = min(y_min, test_point_np[0,1] - 0.5) |
|
|
y_max = max(y_max, test_point_np[0,1] + 0.5) |
|
|
|
|
|
|
|
|
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), |
|
|
np.linspace(y_min, y_max, 100)) |
|
|
|
|
|
|
|
|
Z = dt_model.predict(np.c_[xx.ravel(), yy.ravel()]) |
|
|
Z = Z.reshape(xx.shape) |
|
|
|
|
|
|
|
|
decision_boundary_z = Z.tolist() |
|
|
decision_boundary_x_coords = xx[0, :].tolist() |
|
|
decision_boundary_y_coords = yy[:, 0].tolist() |
|
|
|
|
|
return jsonify({ |
|
|
'prediction': prediction, |
|
|
'decision_boundary_z': decision_boundary_z, |
|
|
'decision_boundary_x_coords': decision_boundary_x_coords, |
|
|
'decision_boundary_y_coords': decision_boundary_y_coords |
|
|
}) |
|
|
except Exception as e: |
|
|
|
|
|
print(f"An error occurred in /dt_visual_predict: {e}") |
|
|
|
|
|
return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500 |
|
|
|
|
|
|
|
|
|
|
|
from urllib.parse import urlparse |
|
|
from sklearn.naive_bayes import GaussianNB |
|
|
from nltk.corpus import words |
|
|
|
|
|
nb_model = load_file("Models/nb_url_model.pkl") |
|
|
vectorizer = load_file("Models/nb_url_vectorizer.pkl") |
|
|
|
|
|
if nb_model is not None and vectorizer is not None: |
|
|
print("✅ Loaded Naive Bayes URL model") |
|
|
else: |
|
|
nb_model, vectorizer = None, None |
|
|
print("❌ Model/vectorizer not found") |
|
|
|
|
|
|
|
|
|
|
|
@app.route('/nb_spam') |
|
|
def nb_spam_page(): |
|
|
return render_template('NB_spam.html') |
|
|
|
|
|
|
|
|
import re |
|
|
from urllib.parse import urlparse |
|
|
from spellchecker import SpellChecker |
|
|
import wordninja |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
whitelist = set([ |
|
|
|
|
|
'google', 'bing', 'yahoo', 'duckduckgo', 'baidu', 'ask', |
|
|
|
|
|
|
|
|
'facebook', 'instagram', 'twitter', 'linkedin', 'snapchat', 'tiktok', |
|
|
'threads', 'pinterest', 'reddit', 'quora', |
|
|
|
|
|
|
|
|
'whatsapp', 'telegram', 'skype', 'zoom', 'meet', 'discord', |
|
|
'teams', 'signal', 'messenger', |
|
|
|
|
|
|
|
|
'amazon', 'ebay', 'shopify', 'alibaba', 'walmart', 'target', |
|
|
'etsy', 'shein', 'bestbuy', 'costco', 'newegg', |
|
|
|
|
|
|
|
|
'flipkart', 'myntra', 'ajio', 'nykaa', 'meesho', 'snapdeal', |
|
|
'paytm', 'phonepe', 'mobikwik', 'zomato', 'swiggy', 'ola', 'uber', 'bookmyshow', |
|
|
'ixigo', 'makemytrip', 'yatra', 'redbus', 'bigbasket', 'grofers', 'blinkit', |
|
|
'universalcollegeofengineering', |
|
|
|
|
|
|
|
|
'youtube', 'docs', 'drive', 'calendar', 'photos', 'gmail', 'notion', |
|
|
'edx', 'coursera', 'udemy', 'khanacademy', 'byjus', 'unacademy', |
|
|
|
|
|
|
|
|
'bbc', 'cnn', 'nyt', 'forbes', 'bloomberg', 'reuters', |
|
|
'ndtv', 'indiatimes', 'thehindu', 'hindustantimes', 'indiatoday', |
|
|
'techcrunch', 'verge', 'wired', |
|
|
|
|
|
|
|
|
'netflix', 'hotstar', 'primevideo', 'spotify', 'gaana', 'wynk', 'saavn', 'voot', |
|
|
|
|
|
|
|
|
'github', 'stackoverflow', 'medium', 'gitlab', 'bitbucket', |
|
|
'adobe', 'figma', 'canva', |
|
|
|
|
|
|
|
|
'hdfcbank', 'icicibank', 'sbi', 'axisbank', 'kotak', 'boi', 'upi', |
|
|
'visa', 'mastercard', 'paypal', 'stripe', 'razorpay', 'phonepe', 'paytm', |
|
|
|
|
|
|
|
|
'gov', 'nic', 'irctc', 'uidai', 'mygov', 'incometax', 'aadhar', 'rbi', |
|
|
|
|
|
|
|
|
'airtel', 'jio', 'bsnl', 'vi', 'speedtest', 'cricbuzz', 'espn', 'espncricinfo', |
|
|
'wikipedia', 'mozilla', 'opera', 'chrome', 'android', 'apple', 'windows', 'microsoft' |
|
|
]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trusted_tlds = [ |
|
|
'.gov', '.nic.in', '.edu', '.ac.in', '.mil', '.org', '.int', |
|
|
'.co.in', '.gov.in', '.res.in', '.net.in', '.nic.gov.in' |
|
|
] |
|
|
|
|
|
|
|
|
bad_tlds = [ |
|
|
'.xyz', '.tk', '.ml', '.ga', '.cf', '.top', '.gq', '.cn', |
|
|
'.ru', '.pw', '.bid', '.link', '.loan', '.party', '.science', |
|
|
'.stream', '.webcam', '.online', '.site', '.website', '.space', |
|
|
'.club', '.buzz', '.info' |
|
|
] |
|
|
|
|
|
|
|
|
suspicious_extensions = ['.exe', '.zip', '.rar', '.js', '.php', '.asp', '.aspx', '.jsp', '.sh'] |
|
|
|
|
|
|
|
|
phishing_keywords = [ |
|
|
'login', 'verify', 'secure', 'account', 'update', 'confirm', 'authenticate', |
|
|
'free', 'bonus', 'offer', 'prize', 'winner', 'gift', 'coupon', 'discount', |
|
|
'bank', 'paypal', 'creditcard', 'mastercard', 'visa', 'amex', 'westernunion', |
|
|
'signin', 'click', 'password', 'unlock', 'recover', 'validate', 'urgency', |
|
|
'limitedtime', 'expires', 'suspicious', 'alert', 'important', 'actionrequired' |
|
|
] |
|
|
|
|
|
|
|
|
rules = { |
|
|
5: r"https?://\d{1,3}(\.\d{1,3}){3}", |
|
|
6: r"@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", |
|
|
7: r"(free money|win now|click here)", |
|
|
8: r"https?://[^\s]*\.(ru|cn|tk)", |
|
|
9: r"https?://.{0,6}\..{2,6}/.{0,6}", |
|
|
10: r"[0-9]{10,}", |
|
|
12: r"https?://[^\s]*@[^\s]+", |
|
|
13: r"https?://[^\s]*//[^\s]+", |
|
|
14: r"https?://[^\s]*\?(?:[^=]+=[^&]*&){5,}", |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
def is_gibberish_word(word): |
|
|
vowels = "aeiou" |
|
|
v_count = sum(c in vowels for c in word) |
|
|
return v_count / len(word) < 0.25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_words(url): |
|
|
parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) |
|
|
parts = re.split(r'\W+', parsed.netloc + parsed.path) |
|
|
final_words = [] |
|
|
for word in parts: |
|
|
if len(word) > 2 and word.isalpha(): |
|
|
split_words = wordninja.split(word.lower()) |
|
|
if len(split_words) <= 1: |
|
|
split_words = [word.lower()] |
|
|
final_words.extend(split_words) |
|
|
return final_words |
|
|
|
|
|
|
|
|
|
|
|
@app.route("/predict", methods=["POST"]) |
|
|
def predict(): |
|
|
try: |
|
|
data = request.get_json() |
|
|
url = data.get("url", "").lower() |
|
|
if not url: |
|
|
return jsonify({'error': 'No URL provided'}), 400 |
|
|
|
|
|
parsed = urlparse(url if url.startswith(("http://", "https://")) else "http://" + url) |
|
|
path = parsed.path |
|
|
|
|
|
|
|
|
spell = SpellChecker(distance=1) |
|
|
|
|
|
|
|
|
words = extract_words(url) |
|
|
|
|
|
tlds_to_ignore = [tld.replace('.', '',"/") for tld in trusted_tlds + bad_tlds] |
|
|
words_for_spellcheck = [w for w in words if w not in tlds_to_ignore] |
|
|
|
|
|
misspelled = spell.unknown(words_for_spellcheck) |
|
|
steps = [{"word": w, "valid": (w not in misspelled) or (w in tlds_to_ignore)} for w in words] |
|
|
|
|
|
if misspelled: |
|
|
return jsonify({ |
|
|
"prediction": 1, |
|
|
"reason": f"🧾 Spelling errors: {', '.join(misspelled)}", |
|
|
"steps": steps |
|
|
}) |
|
|
else: |
|
|
return jsonify({ |
|
|
"prediction": 0, |
|
|
"reason": "✅ No spelling issues", |
|
|
"steps": steps |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return jsonify({'error': f"An issue occurred during spell checking: {str(e)}"}), 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/naive_bayes') |
|
|
def naive_bayes_page(): |
|
|
return render_template('naive_bayes_viz.html') |
|
|
|
|
|
|
|
|
@app.route('/nb_visual_predict', methods=['POST']) |
|
|
def nb_visual_predict(): |
|
|
try: |
|
|
data = request.json |
|
|
labeled_points = data['points'] |
|
|
test_point = data['test_point'] |
|
|
|
|
|
df = pd.DataFrame(labeled_points, columns=['X1', 'X2', 'Class']) |
|
|
X = df[['X1', 'X2']] |
|
|
y = df['Class'] |
|
|
|
|
|
|
|
|
if X.empty or len(X) < 2: |
|
|
return jsonify({'error': 'Not enough data points to train the model.'}), 400 |
|
|
if len(y.unique()) < 2: |
|
|
return jsonify({'error': 'Need at least two different classes to classify.'}), 400 |
|
|
|
|
|
|
|
|
|
|
|
nb_model = GaussianNB() |
|
|
nb_model.fit(X, y) |
|
|
|
|
|
|
|
|
test_point_np = np.array(test_point).reshape(1, -1) |
|
|
prediction = int(nb_model.predict(test_point_np)[0]) |
|
|
|
|
|
|
|
|
x_min, x_max = X['X1'].min(), X['X1'].max() |
|
|
y_min, y_max = X['X2'].min(), X['X2'].max() |
|
|
|
|
|
x_buffer = 1.0 if x_max - x_min == 0 else (x_max - x_min) * 0.1 |
|
|
y_buffer = 1.0 if y_max - y_min == 0 else (y_max - y_min) * 0.1 |
|
|
|
|
|
x_min -= x_buffer |
|
|
x_max += x_buffer |
|
|
y_min -= y_buffer |
|
|
y_max += y_buffer |
|
|
|
|
|
x_min = min(x_min, test_point_np[0,0] - 0.5) |
|
|
x_max = max(x_max, test_point_np[0,0] + 0.5) |
|
|
y_min = min(y_min, test_point_np[0,1] - 0.5) |
|
|
y_max = max(y_max, test_point_np[0,1] + 0.5) |
|
|
|
|
|
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), |
|
|
np.linspace(y_min, y_max, 100)) |
|
|
|
|
|
if xx.size == 0 or yy.size == 0: |
|
|
return jsonify({'error': 'Meshgrid could not be created. Data range too narrow.'}), 400 |
|
|
|
|
|
|
|
|
|
|
|
Z = nb_model.predict(np.c_[xx.ravel(), yy.ravel()]) |
|
|
Z = Z.reshape(xx.shape) |
|
|
|
|
|
decision_boundary_z = Z.tolist() |
|
|
decision_boundary_x_coords = xx[0, :].tolist() |
|
|
decision_boundary_y_coords = yy[:, 0].tolist() |
|
|
|
|
|
return jsonify({ |
|
|
'prediction': prediction, |
|
|
'decision_boundary_z': decision_boundary_z, |
|
|
'decision_boundary_x_coords': decision_boundary_x_coords, |
|
|
'decision_boundary_y_coords': decision_boundary_y_coords |
|
|
}) |
|
|
except Exception as e: |
|
|
print(f"An error occurred in /nb_visual_predict: {e}") |
|
|
return jsonify({'error': f'Backend Error: {str(e)}. Check server console for details.'}), 500 |
|
|
|
|
|
def check_with_virustotal(url): |
|
|
try: |
|
|
headers = {"x-apikey": VT_API_KEY} |
|
|
submit_url = "https://www.virustotal.com/api/v3/urls" |
|
|
|
|
|
|
|
|
response = requests.post(submit_url, headers=headers, data={"url": url}) |
|
|
url_id = response.json()["data"]["id"] |
|
|
|
|
|
|
|
|
result = requests.get(f"{submit_url}/{url_id}", headers=headers) |
|
|
data = result.json() |
|
|
|
|
|
stats = data["data"]["attributes"]["last_analysis_stats"] |
|
|
malicious_count = stats.get("malicious", 0) |
|
|
|
|
|
if malicious_count > 0: |
|
|
return True, f"☣️ VirusTotal flagged it as malicious ({malicious_count} engines)" |
|
|
return False, None |
|
|
except Exception as e: |
|
|
print(f"⚠️ VirusTotal error: {e}") |
|
|
|
|
|
|
|
|
|
|
|
return False, None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/kmeans-clustering') |
|
|
def clustering(): |
|
|
return render_template('clustering.html') |
|
|
|
|
|
|
|
|
@app.route('/kmeans-Dbscan-image', methods=['GET', 'POST']) |
|
|
def compress_and_clean(): |
|
|
final_image = None |
|
|
|
|
|
if request.method == 'POST': |
|
|
try: |
|
|
|
|
|
mode = request.form.get('mode', 'compress') |
|
|
k = int(request.form.get('k', 8)) |
|
|
eps = float(request.form.get('eps', 0.6)) |
|
|
min_samples = int(request.form.get('min_samples', 50)) |
|
|
image_file = request.files.get('image') |
|
|
|
|
|
if image_file and image_file.filename != '': |
|
|
|
|
|
img = Image.open(image_file).convert('RGB') |
|
|
max_size = (518, 518) |
|
|
img.thumbnail(max_size, Image.Resampling.LANCZOS) |
|
|
|
|
|
img_np = np.array(img) |
|
|
h, w, d = img_np.shape |
|
|
pixels = img_np.reshape(-1, d) |
|
|
|
|
|
|
|
|
kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) |
|
|
kmeans.fit(pixels) |
|
|
clustered_pixels = kmeans.cluster_centers_[kmeans.labels_].astype(np.uint8) |
|
|
|
|
|
|
|
|
if mode == 'compress': |
|
|
final_pixels = clustered_pixels.reshape(h, w, d) |
|
|
|
|
|
|
|
|
else: |
|
|
|
|
|
max_dbscan_pixels = 10000 |
|
|
if len(clustered_pixels) > max_dbscan_pixels: |
|
|
idx = np.random.choice(len(clustered_pixels), max_dbscan_pixels, replace=False) |
|
|
dbscan_input = clustered_pixels[idx] |
|
|
else: |
|
|
dbscan_input = clustered_pixels |
|
|
|
|
|
|
|
|
|
|
|
max_dbscan_pixels = 10000 |
|
|
|
|
|
scaler = StandardScaler() |
|
|
pixels_scaled = scaler.fit_transform(dbscan_input) |
|
|
db = DBSCAN(eps=eps, min_samples=min_samples) |
|
|
labels = db.fit_predict(pixels_scaled) |
|
|
|
|
|
|
|
|
clean_pixels = [] |
|
|
for i in range(len(dbscan_input)): |
|
|
label = labels[i] |
|
|
clean_pixels.append([0, 0, 0] if label == -1 else dbscan_input[i]) |
|
|
|
|
|
|
|
|
if len(clustered_pixels) > max_dbscan_pixels: |
|
|
clean_pixels.extend([[0, 0, 0]] * (len(clustered_pixels) - len(clean_pixels))) |
|
|
|
|
|
final_pixels = np.array(clean_pixels, dtype=np.uint8).reshape(h, w, d) |
|
|
|
|
|
|
|
|
final_img = Image.fromarray(final_pixels) |
|
|
final_image = 'compressed_clean.jpg' |
|
|
final_img.save(os.path.join(app.config['UPLOAD_FOLDER'], final_image), optimize=True, quality=90) |
|
|
|
|
|
except Exception as e: |
|
|
return f"⚠️ Error: {str(e)}", 500 |
|
|
|
|
|
return render_template('kmean-dbscan-image.html', final_image=final_image) |
|
|
|
|
|
@app.route('/DBscan') |
|
|
def DBSCAN(): |
|
|
return render_template('DBSCAN.html') |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run(host="0.0.0.0", port=7860) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|