Spaces:

Em4e
/

testing

Sleeping

App Files Files Community

Em4e commited on Aug 12, 2025

Commit

9f08712

verified ·

1 Parent(s): fbf7416

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -110

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st
 import pandas as pd
 import numpy as np
-import grape
 import matplotlib.pyplot as plt
 import seaborn as sns
 import plotly.express as px
@@ -25,9 +25,9 @@ st.set_page_config(
 if 'www_graph_cache' not in st.session_state:
     st.session_state.www_graph_cache = None
-def load_graph_from_csv_grape(file_content, file_name):
     """
-    Load page links from CSV file using Grape.
     """
     try:
         # Read CSV content
@@ -60,20 +60,14 @@ def load_graph_from_csv_grape(file_content, file_name):
         all_nodes = list(set(df['FROM'].tolist() + df['TO'].tolist()))
         node_to_idx = {node: i for i, node in enumerate(all_nodes)}
-        # Create edge list with indices
-        edge_list = []
         for _, row in df.iterrows():
             source_idx = node_to_idx[row['FROM']]
             target_idx = node_to_idx[row['TO']]
-            edge_list.append((source_idx, target_idx))
-        # Create Grape graph
-        G = grape.Graph.from_edge_list(
-            edge_list=edge_list,
-            directed=True,
-            node_names=[str(i) for i in range(len(all_nodes))],
-            name=f"graph_{file_name}"
-        )
         return G, all_nodes, node_to_idx
@@ -82,9 +76,9 @@ def load_graph_from_csv_grape(file_content, file_name):
         st.info("💡 **Tip**: Make sure your file is a valid CSV with FROM and TO columns for page links")
         return None, None, None
-def create_www_graph_grape(n_nodes, m_edges, seed=42):
     """
-    Create a realistic internet simulation using Grape.
     """
     cache_key = (n_nodes, m_edges, seed)
@@ -96,61 +90,31 @@ def create_www_graph_grape(n_nodes, m_edges, seed=42):
     random.seed(seed)
     np.random.seed(seed)
-    # Create Barabási-Albert graph manually since Grape doesn't have this built-in
-    # Start with a complete graph of m_edges nodes
-    edges = []
-    for i in range(m_edges):
-        for j in range(i + 1, m_edges):
-            edges.append((i, j))
-            edges.append((j, i))  # Make it directed
-    # Add remaining nodes with preferential attachment
-    degrees = [2 * m_edges] * m_edges  # Initial degrees
-    for new_node in range(m_edges, n_nodes):
-        # Select m_edges nodes to connect to based on preferential attachment
-        total_degree = sum(degrees)
-        targets = set()
-        while len(targets) < min(m_edges, new_node):
-            # Probability proportional to degree
-            rand_val = random.random() * total_degree
-            cumsum = 0
-            for i, degree in enumerate(degrees):
-                cumsum += degree
-                if cumsum >= rand_val and i not in targets:
-                    targets.add(i)
-                    break
-        # Add edges
-        for target in targets:
-            edges.append((new_node, target))
-            edges.append((target, new_node))  # Bidirectional
-        # Update degrees
-        degrees.append(2 * len(targets))
-        for target in targets:
-            degrees[target] += 2
-    # Create Grape graph
-    www_graph = grape.Graph.from_edge_list(
-        edge_list=edges,
-        directed=True,
-        node_names=[str(i) for i in range(n_nodes)],
-        name="www_simulation"
-    )
     # Cache the result
     st.session_state.www_graph_cache = (cache_key, www_graph)
     return www_graph
-def process_configuration_grape(www_graph, kalicube_graph, kalicube_nodes,
-                               min_connections=5, max_connections=50):
     """
-    Test how your page network performs in the real internet using Grape.
     """
     # Get WWW graph info
-    www_node_count = www_graph.get_number_of_nodes()
     kalicube_node_count = len(kalicube_nodes)
     # Create node mapping for kalicube nodes
@@ -161,20 +125,23 @@ def process_configuration_grape(www_graph, kalicube_graph, kalicube_nodes,
         new_node_id = kalicube_offset + i
         kalicube_node_mapping[node] = new_node_id
-    # Get edges from both graphs
-    www_edges = www_graph.get_edge_list()
-    kalicube_edges = kalicube_graph.get_edge_list()
-    # Convert kalicube edges to use new node IDs
-    kalicube_mapped_edges = []
-    kalicube_idx_to_node = {i: node for node, i in kalicube_graph.get_node_name_to_node_id_map().items()}
-    for source_idx, target_idx in kalicube_edges:
-        source_node = kalicube_idx_to_node[source_idx]
-        target_node = kalicube_idx_to_node[target_idx]
         new_source_id = kalicube_node_mapping[source_node]
         new_target_id = kalicube_node_mapping[target_node]
-        kalicube_mapped_edges.append((new_source_id, new_target_id))
     # Randomly connect kalicube pages to WWW
     n_connections = min(min_connections, www_node_count, kalicube_node_count)
@@ -182,35 +149,20 @@ def process_configuration_grape(www_graph, kalicube_graph, kalicube_nodes,
     www_sample = random.sample(range(www_node_count), n_connections)
     kalicube_sample = random.sample(list(kalicube_node_mapping.values()), n_connections)
-    connection_edges = []
     for www_node, kalicube_node in zip(www_sample, kalicube_sample):
-        connection_edges.append((www_node, kalicube_node))
-    # Combine all edges
-    all_edges = list(www_edges) + kalicube_mapped_edges + connection_edges
-    total_nodes = www_node_count + kalicube_node_count
-    # Create merged graph
-    merged_graph = grape.Graph.from_edge_list(
-        edge_list=all_edges,
-        directed=True,
-        node_names=[str(i) for i in range(total_nodes)],
-        name="merged_simulation"
-    )
-    # Calculate PageRank
     try:
-        pagerank_values = merged_graph.pagerank(
-            damping_factor=0.85,
-            maximum_iterations=100,
-            tolerance=1e-6
-        )
     except Exception as e:
         st.warning(f"PageRank calculation failed: {e}. Using degree centrality instead.")
         # Fallback to degree centrality
-        degrees = merged_graph.get_node_degrees()
-        total_degree = sum(degrees)
-        pagerank_values = [deg / total_degree if total_degree > 0 else 0 for deg in degrees]
     # Extract PageRank values for kalicube nodes
     pagerank_dict = {}
@@ -279,15 +231,15 @@ def run_single_simulation(simulation_id, kalicube_graph_old, kalicube_graph_new,
     np.random.seed(sim_seed)
     # Create internet simulation
-    www_graph = create_www_graph_grape(www_nodes, www_edges, sim_seed)
     # Test original setup
-    importance_old_dict = process_configuration_grape(
         www_graph, kalicube_graph_old, kalicube_nodes_old, min_conn, max_conn
     )
     # Test new setup
-    importance_new_dict = process_configuration_grape(
         www_graph, kalicube_graph_new, kalicube_nodes_new, min_conn, max_conn
     )
@@ -393,7 +345,7 @@ def create_simple_visualizations(results_df, all_comparisons_df, confidence_thre
                  delta="per test")
 def main():
-    st.title("🔗 Page Link Impact Analyzer (Powered by Grape)")
     st.markdown("**Find out if your page link changes will help or hurt your search rankings**")
     # Simple intro
@@ -404,7 +356,7 @@ def main():
     **What you need:** Two CSV files - one with your current page links, one with your planned changes.
-    🍇 **Now powered by Grape** - A high-performance graph library for faster and more efficient analysis!
     """)
     # Sidebar - simplified
@@ -470,10 +422,10 @@ def main():
         # Load and validate files
         with st.spinner("Reading your files..."):
             kalicube_graph_old, kalicube_nodes_old, kalicube_url_mapping_old = \
-                load_graph_from_csv_grape(old_content, old_file.name)
             kalicube_graph_new, kalicube_nodes_new, kalicube_url_mapping_new = \
-                load_graph_from_csv_grape(new_content, new_file.name)
         if kalicube_graph_old is not None and kalicube_graph_new is not None:
             # Show what we found
@@ -484,14 +436,14 @@ def main():
                 st.info(f"""
                 **Current Setup:**
                 - {len(kalicube_nodes_old)} pages
-                - {kalicube_graph_old.get_number_of_edges()} links between them
                 """)
             with info_col2:
                 st.info(f"""
                 **Planned Setup:**
                 - {len(kalicube_nodes_new)} pages
-                - {kalicube_graph_new.get_number_of_edges()} links between them
                 """)
             # Big, obvious run button
@@ -652,8 +604,8 @@ def main():
             ### 🎯 **Why This Works**
             Instead of guessing, you get data-driven confidence about your page link changes!
-            ### 🍇 **Powered by Grape**
-            This version uses Grape, a high-performance graph library that's much faster than traditional tools for analyzing large networks.
             """)
         with st.expander("❓ **Common Questions**"):
@@ -662,7 +614,7 @@ def main():
             A: The tool shows trends and probabilities, not exact predictions. It's like weather forecasting - very useful for planning!
             **Q: How long does it take?**
-            A: Usually 30 seconds to 2 minutes, depending on your settings. Grape makes it faster than before!
             **Q: What if I get yellow results?**
             A: Yellow means proceed carefully. Consider running more tests, getting expert advice, or monitoring closely if you implement.
@@ -676,8 +628,8 @@ def main():
             **Q: What's the difference between pages and websites?**
             A: Pages are specific URLs (like mysite.com/about), while websites are domains (like mysite.com). This tool analyzes individual page links.
-            **Q: What's new with Grape?**
-            A: Grape is a high-performance graph library that makes calculations much faster and can handle larger datasets more efficiently than NetworkX.
             """)
 if __name__ == "__main__":

 import streamlit as st
 import pandas as pd
 import numpy as np
+import networkit as nk
 import matplotlib.pyplot as plt
 import seaborn as sns
 import plotly.express as px
 if 'www_graph_cache' not in st.session_state:
     st.session_state.www_graph_cache = None
+def load_graph_from_csv_networkit(file_content, file_name):
     """
+    Load page links from CSV file using NetworKit.
     """
     try:
         # Read CSV content
         all_nodes = list(set(df['FROM'].tolist() + df['TO'].tolist()))
         node_to_idx = {node: i for i, node in enumerate(all_nodes)}
+        # Create NetworKit graph
+        G = nk.Graph(n=len(all_nodes), weighted=False, directed=True)
+        # Add edges
         for _, row in df.iterrows():
             source_idx = node_to_idx[row['FROM']]
             target_idx = node_to_idx[row['TO']]
+            G.addEdge(source_idx, target_idx)
         return G, all_nodes, node_to_idx
         st.info("💡 **Tip**: Make sure your file is a valid CSV with FROM and TO columns for page links")
         return None, None, None
+def create_www_graph_networkit(n_nodes, m_edges, seed=42):
     """
+    Create a realistic internet simulation using NetworKit.
     """
     cache_key = (n_nodes, m_edges, seed)
     random.seed(seed)
     np.random.seed(seed)
+    # Create Barabási-Albert graph using NetworKit's generator
+    generator = nk.generators.BarabasiAlbertGenerator(k=m_edges, nMax=n_nodes, n0=m_edges)
+    generator.setSeed(seed, False)
+    www_graph = generator.generate()
+    # Make it directed
+    if not www_graph.isDirected():
+        # Convert to directed by creating a new directed graph
+        directed_graph = nk.Graph(n=www_graph.numberOfNodes(), weighted=False, directed=True)
+        for u, v in www_graph.iterEdges():
+            directed_graph.addEdge(u, v)
+            directed_graph.addEdge(v, u)  # Make bidirectional
+        www_graph = directed_graph
     # Cache the result
     st.session_state.www_graph_cache = (cache_key, www_graph)
     return www_graph
+def process_configuration_networkit(www_graph, kalicube_graph, kalicube_nodes,
+                                   min_connections=5, max_connections=50):
     """
+    Test how your page network performs in the real internet using NetworKit.
     """
     # Get WWW graph info
+    www_node_count = www_graph.numberOfNodes()
     kalicube_node_count = len(kalicube_nodes)
     # Create node mapping for kalicube nodes
         new_node_id = kalicube_offset + i
         kalicube_node_mapping[node] = new_node_id
+    # Create merged graph
+    total_nodes = www_node_count + kalicube_node_count
+    merged_graph = nk.Graph(n=total_nodes, weighted=False, directed=True)
+    # Add WWW edges
+    for u, v in www_graph.iterEdges():
+        merged_graph.addEdge(u, v)
+    # Add kalicube edges with new node IDs
+    kalicube_idx_to_node = {i: node for i, node in enumerate(kalicube_nodes)}
+    for u, v in kalicube_graph.iterEdges():
+        source_node = kalicube_idx_to_node[u]
+        target_node = kalicube_idx_to_node[v]
         new_source_id = kalicube_node_mapping[source_node]
         new_target_id = kalicube_node_mapping[target_node]
+        merged_graph.addEdge(new_source_id, new_target_id)
     # Randomly connect kalicube pages to WWW
     n_connections = min(min_connections, www_node_count, kalicube_node_count)
     www_sample = random.sample(range(www_node_count), n_connections)
     kalicube_sample = random.sample(list(kalicube_node_mapping.values()), n_connections)
     for www_node, kalicube_node in zip(www_sample, kalicube_sample):
+        merged_graph.addEdge(www_node, kalicube_node)
+    # Calculate PageRank using NetworKit
     try:
+        pagerank_algo = nk.centrality.PageRank(merged_graph, damp=0.85, tol=1e-6)
+        pagerank_algo.run()
+        pagerank_values = pagerank_algo.scores()
     except Exception as e:
         st.warning(f"PageRank calculation failed: {e}. Using degree centrality instead.")
         # Fallback to degree centrality
+        degree_algo = nk.centrality.DegreeCentrality(merged_graph, normalized=True)
+        degree_algo.run()
+        pagerank_values = degree_algo.scores()
     # Extract PageRank values for kalicube nodes
     pagerank_dict = {}
     np.random.seed(sim_seed)
     # Create internet simulation
+    www_graph = create_www_graph_networkit(www_nodes, www_edges, sim_seed)
     # Test original setup
+    importance_old_dict = process_configuration_networkit(
         www_graph, kalicube_graph_old, kalicube_nodes_old, min_conn, max_conn
     )
     # Test new setup
+    importance_new_dict = process_configuration_networkit(
         www_graph, kalicube_graph_new, kalicube_nodes_new, min_conn, max_conn
     )
                  delta="per test")
 def main():
+    st.title("🔗 Page Link Impact Analyzer (Powered by NetworKit)")
     st.markdown("**Find out if your page link changes will help or hurt your search rankings**")
     # Simple intro
     **What you need:** Two CSV files - one with your current page links, one with your planned changes.
+    ⚡ **Now powered by NetworKit** - A high-performance network analysis toolkit for faster and more efficient analysis!
     """)
     # Sidebar - simplified
         # Load and validate files
         with st.spinner("Reading your files..."):
             kalicube_graph_old, kalicube_nodes_old, kalicube_url_mapping_old = \
+                load_graph_from_csv_networkit(old_content, old_file.name)
             kalicube_graph_new, kalicube_nodes_new, kalicube_url_mapping_new = \
+                load_graph_from_csv_networkit(new_content, new_file.name)
         if kalicube_graph_old is not None and kalicube_graph_new is not None:
             # Show what we found
                 st.info(f"""
                 **Current Setup:**
                 - {len(kalicube_nodes_old)} pages
+                - {kalicube_graph_old.numberOfEdges()} links between them
                 """)
             with info_col2:
                 st.info(f"""
                 **Planned Setup:**
                 - {len(kalicube_nodes_new)} pages
+                - {kalicube_graph_new.numberOfEdges()} links between them
                 """)
             # Big, obvious run button
             ### 🎯 **Why This Works**
             Instead of guessing, you get data-driven confidence about your page link changes!
+            ### ⚡ **Powered by NetworKit**
+            This version uses NetworKit, a high-performance network analysis toolkit that's much faster than traditional tools for analyzing large networks.
             """)
         with st.expander("❓ **Common Questions**"):
             A: The tool shows trends and probabilities, not exact predictions. It's like weather forecasting - very useful for planning!
             **Q: How long does it take?**
+            A: Usually 30 seconds to 2 minutes, depending on your settings. NetworKit makes it faster than before!
             **Q: What if I get yellow results?**
             A: Yellow means proceed carefully. Consider running more tests, getting expert advice, or monitoring closely if you implement.
             **Q: What's the difference between pages and websites?**
             A: Pages are specific URLs (like mysite.com/about), while websites are domains (like mysite.com). This tool analyzes individual page links.
+            **Q: What's NetworKit?**
+            A: NetworKit is a high-performance network analysis toolkit with optimized C++ algorithms that makes calculations much faster and can handle larger datasets more efficiently.
             """)
 if __name__ == "__main__":