Heretic-Models-Explorer / src /streamlit_app.py
MuXodious's picture
Add quantisation patterns
aab1d6b
Raw
History Blame Contribute Delete
5.26 kB
import streamlit as st
import pandas as pd
from huggingface_hub import HfApi
import requests
import re
import concurrent.futures
# Configure the Streamlit page
st.set_page_config(page_title="Heretic Models Explorer", page_icon="🔥", layout="wide")
st.title("🔥 Heretic Models Explorer")
st.markdown(
"This space lists all models on Hugging Face tagged with "
"[`heretic`](https://huggingface.co/models?other=heretic). "
"It automatically fetches their model cards to extract **KL Divergence** and **Refusals**, "
"allowing you to sort and compare them easily. Click on any column header to sort!"
)
def fetch_model_info(model):
"""Fetches the README.md for a given model and dynamically extracts metrics."""
model_id = model.id
kl_div = None
refusals_str = None
refusal_rate = None
initial_refusals_str = None
# Download the README.md via raw URL for high-speed fetching
url = f"https://huggingface.co/{model_id}/raw/main/README.md"
try:
response = requests.get(url, timeout=5)
if response.status_code == 200:
readme_text = response.text
# Extract KL divergence using Regex
# Matches formats like: "KL divergence | 0.0033"
kl_match = re.search(r"(?i)KL\s*divergence[^a-zA-Z\d\n]*?([\d\.]+)", readme_text)
if kl_match:
try:
kl_div = float(kl_match.group(1))
except ValueError:
pass
# Extract Refusals using Regex
# Matches formats like: "Refusals | 15/100" or "15 / 100"
for line in readme_text.split('\n'):
line_lower = line.lower()
if 'refusals' in line_lower:
fractions = re.findall(r"(\d+)\s*/\s*(\d+)", line)
if not fractions:
continue
if 'initial' in line_lower and initial_refusals_str is None:
initial_refusals_str = f"{fractions[0][0]}/{fractions[0][1]}"
elif 'initial' not in line_lower and refusals_str is None:
refusals_str = f"{fractions[0][0]}/{fractions[0][1]}"
refusal_rate = (int(fractions[0][0]) / int(fractions[0][1])) * 100
if len(fractions) >= 2 and initial_refusals_str is None:
initial_refusals_str = f"{fractions[1][0]}/{fractions[1][1]}"
except Exception:
pass
if kl_div is None or refusals_str is None:
return None
return {
"Model ID": model_id,
"KL Divergence": kl_div,
"Initial Refusals": initial_refusals_str,
"Refusal Rate (%)": refusal_rate,
"Refusals": refusals_str,
"Likes": getattr(model, 'likes', 0),
"Downloads": getattr(model, 'downloads', 0),
"URL": f"https://huggingface.co/{model_id}"
}
@st.cache_data(ttl=3600, show_spinner=False) # Cache for 1 hour
def get_heretic_models():
"""Fetches all heretic models and their metrics concurrently."""
api = HfApi()
# Query all models using the Hugging Face Hub `filter` parameter
models = list(api.list_models(filter="heretic"))
# Fetch details concurrently for speed (10 workers to avoid API rate limiting issues)
quant_patterns = re.compile(r'(?i)(gguf|mlx|awq|nvfp4|gptq|exl[23]|-quant|int8|int4|oq[1-8]|mxfp[48])')
models = [m for m in models if not quant_patterns.search(m.id)]
data = []
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
results = executor.map(fetch_model_info, models)
for res in results:
if res is not None:
data.append(res)
return data
# Main execution
with st.spinner("Fetching heretic models and parsing model cards... This might take a moment on the first run."):
models_data = get_heretic_models()
if not models_data:
st.warning("No models found with the 'heretic' tag.")
else:
df = pd.DataFrame(models_data)
# Make Model ID a clickable Markdown link
df["Model"] = df["URL"]
# Select and order columns for display
display_df = df[["Model", "KL Divergence", "Initial Refusals", "Refusals", "Refusal Rate (%)", "Likes", "Downloads"]]
st.markdown(f"**Found {len(display_df)} models.**")
# Display as an interactive, sortable dataframe
st.dataframe(
display_df,
column_config={
"Model": st.column_config.LinkColumn("Model", display_text=r"https://huggingface\.co/(.*)"),
"KL Divergence": st.column_config.NumberColumn("KL Divergence", format="%.4f"),
"Initial Refusals": st.column_config.TextColumn("Initial Refusals"),
"Refusals": st.column_config.TextColumn("Refusals"),
"Refusal Rate (%)": st.column_config.NumberColumn("Refusal Rate (%)", format="%.2f%%"),
"Likes": st.column_config.NumberColumn("Likes"),
"Downloads": st.column_config.NumberColumn("Downloads")
},
hide_index=True,
use_container_width=True
)