Spaces:

MuXodious
/

Heretic-Models-Explorer

Running

App Files Files Community

Heretic-Models-Explorer / src /streamlit_app.py

MuXodious

Add quantisation patterns

aab1d6b about 2 months ago

Raw

History Blame Contribute Delete

5.26 kB

	import streamlit as st
	import pandas as pd
	from huggingface_hub import HfApi
	import requests
	import re
	import concurrent.futures

	# Configure the Streamlit page
	st.set_page_config(page_title="Heretic Models Explorer", page_icon="🔥", layout="wide")

	st.title("🔥 Heretic Models Explorer")
	st.markdown(
	"This space lists all models on Hugging Face tagged with "
	"[`heretic`](https://huggingface.co/models?other=heretic). "
	"It automatically fetches their model cards to extract KL Divergence and Refusals, "
	"allowing you to sort and compare them easily. Click on any column header to sort!"
	)

	def fetch_model_info(model):
	"""Fetches the README.md for a given model and dynamically extracts metrics."""
	model_id = model.id
	kl_div = None
	refusals_str = None
	refusal_rate = None
	initial_refusals_str = None

	# Download the README.md via raw URL for high-speed fetching
	url = f"https://huggingface.co/{model_id}/raw/main/README.md"
	try:
	response = requests.get(url, timeout=5)
	if response.status_code == 200:
	readme_text = response.text

	# Extract KL divergence using Regex
	# Matches formats like: "KL divergence \| 0.0033"
	kl_match = re.search(r"(?i)KL\sdivergence[^a-zA-Z\d\n]?([\d\.]+)", readme_text)
	if kl_match:
	try:
	kl_div = float(kl_match.group(1))
	except ValueError:
	pass

	# Extract Refusals using Regex
	# Matches formats like: "Refusals \| 15/100" or "15 / 100"
	for line in readme_text.split('\n'):
	line_lower = line.lower()
	if 'refusals' in line_lower:
	fractions = re.findall(r"(\d+)\s/\s(\d+)", line)
	if not fractions:
	continue

	if 'initial' in line_lower and initial_refusals_str is None:
	initial_refusals_str = f"{fractions[0][0]}/{fractions[0][1]}"
	elif 'initial' not in line_lower and refusals_str is None:
	refusals_str = f"{fractions[0][0]}/{fractions[0][1]}"
	refusal_rate = (int(fractions[0][0]) / int(fractions[0][1])) * 100

	if len(fractions) >= 2 and initial_refusals_str is None:
	initial_refusals_str = f"{fractions[1][0]}/{fractions[1][1]}"

	except Exception:
	pass

	if kl_div is None or refusals_str is None:
	return None

	return {
	"Model ID": model_id,
	"KL Divergence": kl_div,
	"Initial Refusals": initial_refusals_str,
	"Refusal Rate (%)": refusal_rate,
	"Refusals": refusals_str,
	"Likes": getattr(model, 'likes', 0),
	"Downloads": getattr(model, 'downloads', 0),
	"URL": f"https://huggingface.co/{model_id}"
	}

	@st.cache_data(ttl=3600, show_spinner=False) # Cache for 1 hour
	def get_heretic_models():
	"""Fetches all heretic models and their metrics concurrently."""
	api = HfApi()

	# Query all models using the Hugging Face Hub `filter` parameter
	models = list(api.list_models(filter="heretic"))

	# Fetch details concurrently for speed (10 workers to avoid API rate limiting issues)
	quant_patterns = re.compile(r'(?i)(gguf\|mlx\|awq\|nvfp4\|gptq\|exl[23]\|-quant\|int8\|int4\|oq[1-8]\|mxfp[48])')
	models = [m for m in models if not quant_patterns.search(m.id)]

	data = []

	with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
	results = executor.map(fetch_model_info, models)
	for res in results:
	if res is not None:
	data.append(res)

	return data

	# Main execution
	with st.spinner("Fetching heretic models and parsing model cards... This might take a moment on the first run."):
	models_data = get_heretic_models()

	if not models_data:
	st.warning("No models found with the 'heretic' tag.")
	else:
	df = pd.DataFrame(models_data)

	# Make Model ID a clickable Markdown link
	df["Model"] = df["URL"]

	# Select and order columns for display
	display_df = df[["Model", "KL Divergence", "Initial Refusals", "Refusals", "Refusal Rate (%)", "Likes", "Downloads"]]

	st.markdown(f"Found {len(display_df)} models.")

	# Display as an interactive, sortable dataframe
	st.dataframe(
	display_df,
	column_config={
	"Model": st.column_config.LinkColumn("Model", display_text=r"https://huggingface\.co/(.*)"),
	"KL Divergence": st.column_config.NumberColumn("KL Divergence", format="%.4f"),
	"Initial Refusals": st.column_config.TextColumn("Initial Refusals"),
	"Refusals": st.column_config.TextColumn("Refusals"),
	"Refusal Rate (%)": st.column_config.NumberColumn("Refusal Rate (%)", format="%.2f%%"),
	"Likes": st.column_config.NumberColumn("Likes"),
	"Downloads": st.column_config.NumberColumn("Downloads")
	},
	hide_index=True,
	use_container_width=True
	)