File size: 2,762 Bytes
8683d51 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | import os
import re
import json
import networkx as nx
import matplotlib.pyplot as plt
# ---------------- DATA EXTRACTION ----------------
def parse_page(page_name):
"""Extract model and lora names from the given HTML page."""
with open(page_name, 'r', encoding='utf-8') as f:
html_content = f.read()
return extract_data_from_html(html_content)
def extract_data_from_html(html_content):
"""Extract specific data from the embedded JSON within the HTML content."""
pattern = r'id="__NEXT_DATA__" type="application/json">(.*?)</script><script defer'
match = re.search(pattern, html_content)
if not match:
return None
json_string = match.group(1)
data_dict = json.loads(json_string)
return get_model_and_resources(data_dict)
def get_model_and_resources(data_dict):
"""Retrieve model and associated resources from the parsed JSON data."""
model_name = data_dict['props']['pageProps']['trpcState']['json']['queries'][0]['state']['data']['meta']['Model']
lora_names = [r['name'] for r in data_dict['props']['pageProps']['trpcState']['json']['queries'][0]['state']['data']['meta']['resources'] if r['type'] == 'lora']
return model_name, lora_names
# ---------------- GRAPH CONSTRUCTION ----------------
def build_graph(data, degree_threshold):
"""Build a bipartite graph from the data and prune nodes with degrees below the threshold."""
B = nx.Graph()
for page_name, (model, loras) in data.items():
B.add_node(model, bipartite=0)
for lora in loras:
B.add_node(lora, bipartite=1)
B.add_edge(model, lora, page=page_name.split('.')[0])
nodes_to_remove = [node for node, degree in dict(B.degree()).items() if degree < degree_threshold]
B.remove_nodes_from(nodes_to_remove)
return B
# ---------------- VISUALIZATION AND ANALYSIS ----------------
def visualize_bipartite(B):
"""Visualize the bipartite graph."""
model_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
lora_nodes = set(B) - model_nodes
pos = nx.bipartite_layout(B, model_nodes)
plt.figure(figsize=(10, 5))
nx.draw(B, pos, with_labels=True, node_color=['#1f78b4' if node in model_nodes else '#33a02c' for node in B.nodes()])
plt.title("Bipartite Graph between Model Name and Lora Name")
plt.show()
def most_connected_models(B, top_n=10):
"""List the most connected models in the bipartite graph."""
model_nodes = {n for n, d in B.nodes(data=True) if d['bipartite']==0}
sorted_models = sorted(model_nodes, key=lambda x: B.degree(x), reverse=True)
for model in sorted_models[:top_n]:
loras = list(B.neighbors(model))
print(f"Model: {model}, Connected Loras: {loras}")
|