Spaces:

arichar14
/

Clustering

Sleeping

App Files Files Community

Clustering / app.py

arichar14

Update app.py

372d0e2 verified 6 months ago

raw

history blame contribute delete

3.46 kB

	'''import pandas as pd
	from sklearn.cluster import KMeans
	import matplotlib.pyplot as plt
	import gradio as gr
	import tempfile

	def cluster_tourism(file, n_clusters):
	# Load CSV
	df = pd.read_csv(file)

	# Clustering
	features = df[['AvgMonthlyTourists', 'AvgTemp', 'Hotels']]
	kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
	df['Cluster'] = kmeans.fit_predict(features)

	# Save clustered CSV to a temporary file
	tmp_csv = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
	df.to_csv(tmp_csv.name, index=False)

	# Plot clusters
	plt.figure(figsize=(6,4))
	for cluster in range(n_clusters):
	subset = df[df['Cluster'] == cluster]
	plt.scatter(subset['AvgMonthlyTourists'], subset['AvgTemp'], label=f'Cluster {cluster}')
	plt.xlabel('Avg Monthly Tourists')
	plt.ylabel('Avg Temp')
	plt.title('City Clusters')
	plt.legend()

	# Save plot to a temporary file
	tmp_plot = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
	plt.savefig(tmp_plot.name)
	plt.close()

	return tmp_csv.name, tmp_plot.name

	demo = gr.Interface(
	fn=cluster_tourism,
	inputs=[
	gr.File(file_types=[".csv"], type="filepath", label="Upload CSV"),
	gr.Slider(minimum=2, maximum=10, step=1, label="Number of Clusters")
	],
	outputs=[
	gr.File(label="CSV with Cluster Labels"),
	gr.Image(label="Cluster Plot")
	],
	title="Tourlandia City Clustering",
	description="Upload a CSV of cities with AvgMonthlyTourists, AvgTemp, and Hotels. Choose number of clusters to group similar cities."
	)

	demo.launch()'''

	import pandas as pd
	from sklearn.cluster import KMeans
	from sklearn.preprocessing import StandardScaler
	import matplotlib.pyplot as plt
	import gradio as gr
	import tempfile

	def cluster_tourism(file, n_clusters):
	# Load CSV
	df = pd.read_csv(file)

	# Features to cluster on
	features = df[['AvgMonthlyTourists', 'AvgTemp', 'Hotels']]

	# Standardize features
	scaler = StandardScaler()
	features_scaled = scaler.fit_transform(features)

	# KMeans clustering
	kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
	df['Cluster'] = kmeans.fit_predict(features_scaled)

	# Save clustered CSV to temporary file
	tmp_csv = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
	df.to_csv(tmp_csv.name, index=False)

	# Plot clusters
	plt.figure(figsize=(6,4))
	for cluster in range(n_clusters):
	subset = df[df['Cluster'] == cluster]
	plt.scatter(subset['AvgMonthlyTourists'], subset['AvgTemp'], label=f'Cluster {cluster}')
	plt.xlabel('Avg Monthly Tourists')
	plt.ylabel('Avg Temp')
	plt.title('City Clusters')
	plt.legend()

	# Save plot to temporary file
	tmp_plot = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
	plt.savefig(tmp_plot.name)
	plt.close()

	return tmp_csv.name, tmp_plot.name

	demo = gr.Interface(
	fn=cluster_tourism,
	inputs=[
	gr.File(file_types=[".csv"], type="filepath", label="Upload CSV"),
	gr.Slider(minimum=2, maximum=10, step=1, label="Number of Clusters")
	],
	outputs=[
	gr.File(label="CSV with Cluster Labels"),
	gr.Image(label="Cluster Plot")
	],
	title="City Clustering",
	description="Upload a CSV of cities with AvgMonthlyTourists, AvgTemp, and Hotels. Choose number of clusters to group similar cities."
	)

	demo.launch()