Clustering / app.py
arichar14's picture
Update app.py
372d0e2 verified
'''import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import gradio as gr
import tempfile
def cluster_tourism(file, n_clusters):
# Load CSV
df = pd.read_csv(file)
# Clustering
features = df[['AvgMonthlyTourists', 'AvgTemp', 'Hotels']]
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
df['Cluster'] = kmeans.fit_predict(features)
# Save clustered CSV to a temporary file
tmp_csv = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
df.to_csv(tmp_csv.name, index=False)
# Plot clusters
plt.figure(figsize=(6,4))
for cluster in range(n_clusters):
subset = df[df['Cluster'] == cluster]
plt.scatter(subset['AvgMonthlyTourists'], subset['AvgTemp'], label=f'Cluster {cluster}')
plt.xlabel('Avg Monthly Tourists')
plt.ylabel('Avg Temp')
plt.title('City Clusters')
plt.legend()
# Save plot to a temporary file
tmp_plot = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
plt.savefig(tmp_plot.name)
plt.close()
return tmp_csv.name, tmp_plot.name
demo = gr.Interface(
fn=cluster_tourism,
inputs=[
gr.File(file_types=[".csv"], type="filepath", label="Upload CSV"),
gr.Slider(minimum=2, maximum=10, step=1, label="Number of Clusters")
],
outputs=[
gr.File(label="CSV with Cluster Labels"),
gr.Image(label="Cluster Plot")
],
title="Tourlandia City Clustering",
description="Upload a CSV of cities with AvgMonthlyTourists, AvgTemp, and Hotels. Choose number of clusters to group similar cities."
)
demo.launch()'''
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import gradio as gr
import tempfile
def cluster_tourism(file, n_clusters):
# Load CSV
df = pd.read_csv(file)
# Features to cluster on
features = df[['AvgMonthlyTourists', 'AvgTemp', 'Hotels']]
# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)
# KMeans clustering
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
df['Cluster'] = kmeans.fit_predict(features_scaled)
# Save clustered CSV to temporary file
tmp_csv = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
df.to_csv(tmp_csv.name, index=False)
# Plot clusters
plt.figure(figsize=(6,4))
for cluster in range(n_clusters):
subset = df[df['Cluster'] == cluster]
plt.scatter(subset['AvgMonthlyTourists'], subset['AvgTemp'], label=f'Cluster {cluster}')
plt.xlabel('Avg Monthly Tourists')
plt.ylabel('Avg Temp')
plt.title('City Clusters')
plt.legend()
# Save plot to temporary file
tmp_plot = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
plt.savefig(tmp_plot.name)
plt.close()
return tmp_csv.name, tmp_plot.name
demo = gr.Interface(
fn=cluster_tourism,
inputs=[
gr.File(file_types=[".csv"], type="filepath", label="Upload CSV"),
gr.Slider(minimum=2, maximum=10, step=1, label="Number of Clusters")
],
outputs=[
gr.File(label="CSV with Cluster Labels"),
gr.Image(label="Cluster Plot")
],
title="City Clustering",
description="Upload a CSV of cities with AvgMonthlyTourists, AvgTemp, and Hotels. Choose number of clusters to group similar cities."
)
demo.launch()