import streamlit as st import pandas as pd from huggingface_hub import HfApi import requests import re import concurrent.futures # Configure the Streamlit page st.set_page_config(page_title="Heretic Models Explorer", page_icon="🔥", layout="wide") st.title("🔥 Heretic Models Explorer") st.markdown( "This space lists all models on Hugging Face tagged with " "[`heretic`](https://huggingface.co/models?other=heretic). " "It automatically fetches their model cards to extract **KL Divergence** and **Refusals**, " "allowing you to sort and compare them easily. Click on any column header to sort!" ) def fetch_model_info(model): """Fetches the README.md for a given model and dynamically extracts metrics.""" model_id = model.id kl_div = None refusals_str = None refusal_rate = None initial_refusals_str = None # Download the README.md via raw URL for high-speed fetching url = f"https://huggingface.co/{model_id}/raw/main/README.md" try: response = requests.get(url, timeout=5) if response.status_code == 200: readme_text = response.text # Extract KL divergence using Regex # Matches formats like: "KL divergence | 0.0033" kl_match = re.search(r"(?i)KL\s*divergence[^a-zA-Z\d\n]*?([\d\.]+)", readme_text) if kl_match: try: kl_div = float(kl_match.group(1)) except ValueError: pass # Extract Refusals using Regex # Matches formats like: "Refusals | 15/100" or "15 / 100" for line in readme_text.split('\n'): line_lower = line.lower() if 'refusals' in line_lower: fractions = re.findall(r"(\d+)\s*/\s*(\d+)", line) if not fractions: continue if 'initial' in line_lower and initial_refusals_str is None: initial_refusals_str = f"{fractions[0][0]}/{fractions[0][1]}" elif 'initial' not in line_lower and refusals_str is None: refusals_str = f"{fractions[0][0]}/{fractions[0][1]}" refusal_rate = (int(fractions[0][0]) / int(fractions[0][1])) * 100 if len(fractions) >= 2 and initial_refusals_str is None: initial_refusals_str = f"{fractions[1][0]}/{fractions[1][1]}" except Exception: pass if kl_div is None or refusals_str is None: return None return { "Model ID": model_id, "KL Divergence": kl_div, "Initial Refusals": initial_refusals_str, "Refusal Rate (%)": refusal_rate, "Refusals": refusals_str, "Likes": getattr(model, 'likes', 0), "Downloads": getattr(model, 'downloads', 0), "URL": f"https://huggingface.co/{model_id}" } @st.cache_data(ttl=3600, show_spinner=False) # Cache for 1 hour def get_heretic_models(): """Fetches all heretic models and their metrics concurrently.""" api = HfApi() # Query all models using the Hugging Face Hub `filter` parameter models = list(api.list_models(filter="heretic")) # Fetch details concurrently for speed (10 workers to avoid API rate limiting issues) quant_patterns = re.compile(r'(?i)(gguf|mlx|awq|nvfp4|gptq|exl[23]|-quant|int8|int4|oq[1-8]|mxfp[48])') models = [m for m in models if not quant_patterns.search(m.id)] data = [] with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: results = executor.map(fetch_model_info, models) for res in results: if res is not None: data.append(res) return data # Main execution with st.spinner("Fetching heretic models and parsing model cards... This might take a moment on the first run."): models_data = get_heretic_models() if not models_data: st.warning("No models found with the 'heretic' tag.") else: df = pd.DataFrame(models_data) # Make Model ID a clickable Markdown link df["Model"] = df["URL"] # Select and order columns for display display_df = df[["Model", "KL Divergence", "Initial Refusals", "Refusals", "Refusal Rate (%)", "Likes", "Downloads"]] st.markdown(f"**Found {len(display_df)} models.**") # Display as an interactive, sortable dataframe st.dataframe( display_df, column_config={ "Model": st.column_config.LinkColumn("Model", display_text=r"https://huggingface\.co/(.*)"), "KL Divergence": st.column_config.NumberColumn("KL Divergence", format="%.4f"), "Initial Refusals": st.column_config.TextColumn("Initial Refusals"), "Refusals": st.column_config.TextColumn("Refusals"), "Refusal Rate (%)": st.column_config.NumberColumn("Refusal Rate (%)", format="%.2f%%"), "Likes": st.column_config.NumberColumn("Likes"), "Downloads": st.column_config.NumberColumn("Downloads") }, hide_index=True, use_container_width=True )