| import streamlit as st |
| import pandas as pd |
| from huggingface_hub import HfApi |
| import requests |
| import re |
| import concurrent.futures |
|
|
| |
| st.set_page_config(page_title="Heretic Models Explorer", page_icon="🔥", layout="wide") |
|
|
| st.title("🔥 Heretic Models Explorer") |
| st.markdown( |
| "This space lists all models on Hugging Face tagged with " |
| "[`heretic`](https://huggingface.co/models?other=heretic). " |
| "It automatically fetches their model cards to extract **KL Divergence** and **Refusals**, " |
| "allowing you to sort and compare them easily. Click on any column header to sort!" |
| ) |
|
|
| def fetch_model_info(model): |
| """Fetches the README.md for a given model and dynamically extracts metrics.""" |
| model_id = model.id |
| kl_div = None |
| refusals_str = None |
| refusal_rate = None |
| initial_refusals_str = None |
| |
| |
| url = f"https://huggingface.co/{model_id}/raw/main/README.md" |
| try: |
| response = requests.get(url, timeout=5) |
| if response.status_code == 200: |
| readme_text = response.text |
| |
| |
| |
| kl_match = re.search(r"(?i)KL\s*divergence[^a-zA-Z\d\n]*?([\d\.]+)", readme_text) |
| if kl_match: |
| try: |
| kl_div = float(kl_match.group(1)) |
| except ValueError: |
| pass |
| |
| |
| |
| for line in readme_text.split('\n'): |
| line_lower = line.lower() |
| if 'refusals' in line_lower: |
| fractions = re.findall(r"(\d+)\s*/\s*(\d+)", line) |
| if not fractions: |
| continue |
| |
| if 'initial' in line_lower and initial_refusals_str is None: |
| initial_refusals_str = f"{fractions[0][0]}/{fractions[0][1]}" |
| elif 'initial' not in line_lower and refusals_str is None: |
| refusals_str = f"{fractions[0][0]}/{fractions[0][1]}" |
| refusal_rate = (int(fractions[0][0]) / int(fractions[0][1])) * 100 |
| |
| if len(fractions) >= 2 and initial_refusals_str is None: |
| initial_refusals_str = f"{fractions[1][0]}/{fractions[1][1]}" |
| |
| except Exception: |
| pass |
|
|
| if kl_div is None or refusals_str is None: |
| return None |
|
|
| return { |
| "Model ID": model_id, |
| "KL Divergence": kl_div, |
| "Initial Refusals": initial_refusals_str, |
| "Refusal Rate (%)": refusal_rate, |
| "Refusals": refusals_str, |
| "Likes": getattr(model, 'likes', 0), |
| "Downloads": getattr(model, 'downloads', 0), |
| "URL": f"https://huggingface.co/{model_id}" |
| } |
|
|
| @st.cache_data(ttl=3600, show_spinner=False) |
| def get_heretic_models(): |
| """Fetches all heretic models and their metrics concurrently.""" |
| api = HfApi() |
| |
| |
| models = list(api.list_models(filter="heretic")) |
| |
| |
| quant_patterns = re.compile(r'(?i)(gguf|mlx|awq|nvfp4|gptq|exl[23]|-quant|int8|int4|oq[1-8]|mxfp[48])') |
| models = [m for m in models if not quant_patterns.search(m.id)] |
| |
| data = [] |
| |
| with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: |
| results = executor.map(fetch_model_info, models) |
| for res in results: |
| if res is not None: |
| data.append(res) |
| |
| return data |
|
|
| |
| with st.spinner("Fetching heretic models and parsing model cards... This might take a moment on the first run."): |
| models_data = get_heretic_models() |
|
|
| if not models_data: |
| st.warning("No models found with the 'heretic' tag.") |
| else: |
| df = pd.DataFrame(models_data) |
| |
| |
| df["Model"] = df["URL"] |
| |
| |
| display_df = df[["Model", "KL Divergence", "Initial Refusals", "Refusals", "Refusal Rate (%)", "Likes", "Downloads"]] |
| |
| st.markdown(f"**Found {len(display_df)} models.**") |
| |
| |
| st.dataframe( |
| display_df, |
| column_config={ |
| "Model": st.column_config.LinkColumn("Model", display_text=r"https://huggingface\.co/(.*)"), |
| "KL Divergence": st.column_config.NumberColumn("KL Divergence", format="%.4f"), |
| "Initial Refusals": st.column_config.TextColumn("Initial Refusals"), |
| "Refusals": st.column_config.TextColumn("Refusals"), |
| "Refusal Rate (%)": st.column_config.NumberColumn("Refusal Rate (%)", format="%.2f%%"), |
| "Likes": st.column_config.NumberColumn("Likes"), |
| "Downloads": st.column_config.NumberColumn("Downloads") |
| }, |
| hide_index=True, |
| use_container_width=True |
| ) |
|
|