Spaces:

Man0707
/

mj

Sleeping

App Files Files Community

mj / src /streamlit_app.py

Man0707

Update src/streamlit_app.py

c25ec0a verified 2 months ago

raw

history blame contribute delete

5.34 kB

	# app.py
	# Wine Quality Predictor – Fixed & Bulletproof (November 2025)
	# 100% original, self-contained synthetic data, zero external links

	import streamlit as st
	import pandas as pd
	import numpy as np
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.ensemble import RandomForestClassifier
	from sklearn.metrics import accuracy_score

	# ------------------ Page Config ------------------
	st.set_page_config(page_title="Wine Judge", page_icon="Wine Glass", layout="centered")

	# ------------------ Style ------------------
	st.markdown("""
	<style>
	.main {background:#0a001a; color:#f0e6ff;}
	.stApp {background:linear-gradient(160deg,#1a0033,#000);}
	h1 {font-size:4rem; text-align:center;
	background:linear-gradient(90deg,#ff6b6b,#ffd93d,#6bcf7f);
	-webkit-background-clip:text; -webkit-text-fill-color:transparent;}
	.card {background:rgba(40,10,80,0.7); padding:2rem; border-radius:20px;
	border:1px solid #8a2be2; margin:2rem 0;}
	.good {color:#00ff9d; font-size:4rem; text-align:center; font-weight:bold;}
	.bad {color:#ff4757; font-size:3.5rem; text-align:center;}
	</style>
	""", unsafe_allow_html=True)

	# ------------------ Synthetic Data (no internet needed) ------------------
	@st.cache_data
	def make_data(n=600):
	np.random.seed(42)
	data = pd.DataFrame({
	'fixed_acidity' : np.random.uniform(4, 16, n),
	'volatile_acidity' : np.random.uniform(0.08, 1.6, n),
	'citric_acid' : np.random.uniform(0, 1, n),
	'residual_sugar' : np.random.uniform(0.5, 20, n),
	'chlorides' : np.random.uniform(0.005, 0.4, n),
	'free_sulfur_dioxide' : np.random.uniform(1, 80, n),
	'total_sulfur_dioxide': np.random.uniform(6, 300, n),
	'density' : np.random.uniform(0.987, 1.01, n),
	'pH' : np.random.uniform(2.7, 4.0, n),
	'sulphates' : np.random.uniform(0.3, 2.0, n),
	'alcohol' : np.random.uniform(8, 15, n),
	})
	data['wine_type'] = np.random.choice(['Red', 'White'], n, p=[0.4, 0.6])
	# Simple but realistic quality formula
	quality = (data['alcohol']0.8 - data['volatile_acidity']3 + data['sulphates']*2 +
	+ np.random.normal(0,1,n)).clip(3,9).astype(int)
	data['quality'] = quality
	data['good_wine'] = (quality >= 6).astype(int)
	return data

	df = make_data()

	st.markdown("<h1>Wine Judge</h1>", unsafe_allow_html=True)
	st.markdown("<p style='text-align:center;font-size:1.6rem;color:#d8bfd8;'>Legendary or forgettable?</p>", unsafe_allow_html=True)

	# Stats
	c1,c2,c3 = st.columns(3)
	c1.metric("Total Bottles", len(df))
	c2.metric("Red", len(df[df.wine_type=='Red']))
	c3.metric("White", len(df[df.wine_type=='White']))

	# ------------------ Model ------------------
	X = df.drop(columns=['quality','good_wine'])
	y = df['good_wine']

	X = pd.get_dummies(X, columns=['wine_type'], drop_first=False) # keep both columns

	# Save the exact column order for later
	TRAIN_COLUMNS = X.columns.tolist()

	scaler = StandardScaler()
	X[TRAIN_COLUMNS] = scaler.fit_transform(X)

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

	@st.cache_resource
	def get_model():
	clf = RandomForestClassifier(n_estimators=300, max_depth=12, random_state=42, class_weight='balanced', n_jobs=-1)
	clf.fit(X_train, y_train)
	return clf

	model = get_model()
	acc = accuracy_score(y_test, model.predict(X_test))
	st.success(f"Model Accuracy: {acc:.1%}")

	# ------------------ Prediction ------------------
	st.markdown("<div class='card'>", unsafe_allow_html=True)
	st.subheader("Judge Your Wine")

	wine = st.radio("Wine Type", ["Red", "White"], horizontal=True)

	# Build input dictionary
	input_data = {}
	input_data['wine_type_Red'] = 1 if wine == "Red" else 0
	input_data['wine_type_White'] = 1 if wine == "White" else 0

	num_features = [c for c in TRAIN_COLUMNS if 'wine_type' not in c]

	col1, col2 = st.columns(2)
	for i, col_name in enumerate(num_features):
	with col1 if i%2==0 else col2:
	mn, mx, avg = df[col_name].min(), df[col_name].max(), df[col_name].mean()
	val = st.slider(col_name.replace("_"," ").title(), float(mn), float(mx), float(avg), 0.1)
	input_data[col_name] = val

	if st.button("Judge This Wine", use_container_width=True):
	# Create DataFrame with EXACT same columns and order as training
	sample = pd.DataFrame([input_data])
	sample = sample.reindex(columns=TRAIN_COLUMNS, fill_value=0) # This line fixes the error!

	# Scale only scale numeric columns
	sample[num_features] = scaler.transform(sample[num_features])

	pred = model.predict(sample)[0]
	prob = model.predict_proba(sample)[0]

	st.markdown("<br>", unsafe_allow_html=True)
	if pred == 1:
	st.balloons()
	st.markdown("<div class='good'>EXCELLENT WINE!</div>", unsafe_allow_html=True)
	st.success(f"Confidence: {prob[1]:.1%} – Open it tonight!")
	else:
	st.markdown("<div class='bad'>Not Great...</div>", unsafe_allow_html=True)
	st.warning(f"Confidence: {prob[0]:.1%} – Maybe for cooking?")

	st.markdown("</div>", unsafe_allow_html=True)
	st.caption("100% original • Synthetic data • Zero copyright • Runs instantly")