Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import difflib | |
| import requests | |
| import os | |
| import json | |
| FIREBASE_URL = os.getenv("FIREBASE_URL") | |
| def fetch_from_firebase(model_id): | |
| response = requests.get(f"{FIREBASE_URL}/model_structures/{model_id}.json") | |
| if response.status_code == 200: | |
| return response.json() | |
| return None | |
| def save_to_firebase(model_id, structure): | |
| response = requests.put(f"{FIREBASE_URL}/model_structures/{model_id}.json", data=json.dumps(structure)) | |
| return response.status_code == 200 | |
| def get_model_structure(model_id): | |
| structure = fetch_from_firebase(model_id) | |
| if structure: | |
| return structure | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| torch_dtype=torch.bfloat16, | |
| device_map="cpu", | |
| ) | |
| structure = {k: str(v.shape) for k, v in model.state_dict().items()} | |
| save_to_firebase(model_id, structure) | |
| return structure | |
| def compare_structures(struct1, struct2): | |
| struct1_lines = [f"{k}: {v}" for k, v in struct1.items()] | |
| struct2_lines = [f"{k}: {v}" for k, v in struct2.items()] | |
| diff = difflib.ndiff(struct1_lines, struct2_lines) | |
| return diff | |
| def display_diff(diff): | |
| left_lines = [] | |
| right_lines = [] | |
| diff_found = False | |
| for line in diff: | |
| if line.startswith('- '): | |
| left_lines.append(f'<span style="background-color: #ffdddd;">{line[2:]}</span>') | |
| right_lines.append('') | |
| diff_found = True | |
| elif line.startswith('+ '): | |
| right_lines.append(f'<span style="background-color: #ddffdd;">{line[2:]}</span>') | |
| left_lines.append('') | |
| diff_found = True | |
| elif line.startswith(' '): | |
| left_lines.append(line[2:]) | |
| right_lines.append(line[2:]) | |
| else: | |
| pass | |
| left_html = "<br>".join(left_lines) | |
| right_html = "<br>".join(right_lines) | |
| return left_html, right_html, diff_found | |
| # Set Streamlit page configuration to wide mode | |
| st.set_page_config(layout="wide") | |
| # Apply custom CSS for wider layout | |
| st.markdown( | |
| """ | |
| <style> | |
| .reportview-container .main .block-container { | |
| max-width: 100%; | |
| padding-left: 10%; | |
| padding-right: 10%; | |
| } | |
| .stMarkdown { | |
| white-space: pre-wrap; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| st.title("Model Structure Comparison Tool") | |
| model_id1 = st.text_input("Enter the first HuggingFace Model ID") | |
| model_id2 = st.text_input("Enter the second HuggingFace Model ID") | |
| if "compare_button_clicked" not in st.session_state: | |
| st.session_state.compare_button_clicked = False | |
| if st.session_state.compare_button_clicked: | |
| with st.spinner('Comparing models and loading tokenizers...'): | |
| if model_id1 and model_id2: | |
| struct1 = get_model_structure(model_id1) | |
| struct2 = get_model_structure(model_id2) | |
| diff = compare_structures(struct1, struct2) | |
| left_html, right_html, diff_found = display_diff(diff) | |
| st.write("### Comparison Result") | |
| if not diff_found: | |
| st.success("The model structures are identical.") | |
| col1, col2 = st.columns([1.5, 1.5]) # Adjust the ratio to make columns wider | |
| with col1: | |
| st.write("### Model 1") | |
| st.markdown(left_html, unsafe_allow_html=True) | |
| with col2: | |
| st.write("### Model 2") | |
| st.markdown(right_html, unsafe_allow_html=True) | |
| # Tokenizer verification | |
| try: | |
| tokenizer1 = AutoTokenizer.from_pretrained(model_id1) | |
| tokenizer2 = AutoTokenizer.from_pretrained(model_id2) | |
| st.write(f"**{model_id1} Tokenizer Vocab Size**: {tokenizer1.vocab_size}") | |
| st.write(f"**{model_id2} Tokenizer Vocab Size**: {tokenizer2.vocab_size}") | |
| except Exception as e: | |
| st.error(f"Error loading tokenizers: {e}") | |
| else: | |
| st.error("Please enter both model IDs.") | |
| st.session_state.compare_button_clicked = False | |
| else: | |
| if st.button("Compare Models"): | |
| st.session_state.compare_button_clicked = True | |