Spaces:

engralimalik
/

hackathon_lace

Sleeping

App Files Files Community

engralimalik commited on Jan 26, 2025

Commit

f570531

verified ·

1 Parent(s): db1eb60

Create app.py

Browse files

Files changed (1) hide show

app.py +125 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import pandas as pd
+import folium
+from sklearn.cluster import KMeans
+from folium.plugins import MarkerCluster
+import requests
+from io import BytesIO
+import streamlit as st
+import folium
+from streamlit.components.v1 import html
+import math
+# Load data from Excel (directly from the URL)
+def load_data(url):
+    # Request the file content
+    response = requests.get(url)
+    # Check if the content is an Excel file by inspecting the MIME type
+    if 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' not in response.headers['Content-Type']:
+        raise ValueError("The file is not a valid Excel file.")
+    # Read the file content into a pandas dataframe with the engine specified
+    lat_long_data = pd.read_excel(BytesIO(response.content), sheet_name="lat long", engine='openpyxl')
+    measurement_data = pd.read_excel(BytesIO(response.content), sheet_name="measurement data", engine='openpyxl')
+    # Merge data on school_id_giga
+    merged_data = pd.merge(
+        lat_long_data,
+        measurement_data,
+        left_on="school_id_giga",
+        right_on="school_id_giga",
+        how="inner"
+    )
+    # Strip whitespace from all column names
+    merged_data.columns = merged_data.columns.str.strip()
+    return merged_data
+# Haversine formula to calculate distance between two lat/long points
+def haversine(lat1, lon1, lat2, lon2):
+    # Convert latitude and longitude from degrees to radians
+    lat1, lon1, lat2, lon2 = map(math.radians, [lat1, lon1, lat2, lon2])
+    # Haversine formula
+    dlat = lat2 - lat1
+    dlon = lon2 - lon1
+    a = math.sin(dlat / 2) ** 2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon / 2) ** 2
+    c = 2 * math.asin(math.sqrt(a))
+    # Radius of Earth in kilometers
+    R = 6371
+    return R * c
+# Perform clustering to find data center location
+def find_data_center(df, n_clusters=1):
+    kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(df[["latitude", "longitude"]])
+    return kmeans.cluster_centers_
+# Estimate latency and bandwidth based on distance from the data center (inverse relationship)
+def estimate_latency_bandwidth(df, data_center_lat, data_center_lon):
+    df["distance_to_data_center"] = df.apply(
+        lambda row: haversine(row["latitude"], row["longitude"], data_center_lat, data_center_lon),
+        axis=1
+    )
+    # Latency estimation: Assuming latency decreases inversely with distance (this is just an example scale)
+    df["estimated_latency"] = df["distance_to_data_center"].apply(lambda x: max(10, 100 / (x + 1)))  # Max latency 100ms
+    # Bandwidth estimation: Assuming bandwidth increases inversely with distance (again, an example scale)
+    df["estimated_bandwidth"] = df["distance_to_data_center"].apply(lambda x: max(10, 100 / (x + 1)))  # Max bandwidth 100 Mbps
+    return df
+# Create a map and plot the points
+def plot_map(df, center):
+    # Create map centered on the data center location
+    map = folium.Map(location=[center[0][0], center[0][1]], zoom_start=10)
+    marker_cluster = MarkerCluster().add_to(map)
+    # Add school locations to the map
+    for idx, row in df.iterrows():
+        school_name = row.get("school_name", "No Name Provided")  # Ensure correct column access
+        # Popup text showing original latency, estimated latency, bandwidth before and after
+        popup_text = (
+            f"School Name: {school_name}<br>"
+            f"Original Latency: {row['latency']} ms<br>"
+            f"Original Bandwidth: {row['download_speed']} Mbps<br>"
+            f"Estimated Latency After Data Center: {row['estimated_latency']} ms<br>"
+            f"Estimated Bandwidth After Data Center: {row['estimated_bandwidth']} Mbps"
+        )
+        folium.Marker(
+            location=[row["latitude"], row["longitude"]],
+            popup=popup_text,
+            icon=folium.Icon(color="blue", icon="info-sign")
+        ).add_to(marker_cluster)
+    # Add data center location to the map
+    folium.Marker(
+        location=[center[0][0], center[0][1]],
+        popup="Proposed Data Center",
+        icon=folium.Icon(color="red", icon="cloud")
+    ).add_to(map)
+    return map
+# Main function to run the application
+def main():
+    url = "https://huggingface.co/spaces/engralimalik/lace/resolve/main/data%20barbados.xlsx"  # Correct raw file URL
+    df = load_data(url)
+    center = find_data_center(df)
+    df = estimate_latency_bandwidth(df, center[0][0], center[0][1])  # Estimate latency and bandwidth based on distance
+    map = plot_map(df, center)
+    # Embed the map directly in the Streamlit app
+    map_html = map._repr_html_()  # Render the folium map as HTML
+    html(map_html, width=700, height=500)  # Adjust the size of the embedded map
+    st.title("Impact of Data Center on Latency and Bandwidth")
+    st.write("This map shows school locations and proposed data center locations based on clustering. The latency and bandwidth values represent the potential improvements for schools closer to the data center.")
+if __name__ == "__main__":
+    main()