import os import math import spaces import torch import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen3-4B") #MODEL_ID = os.getenv("MODEL_ID", "bigcode/starcoder2-3b") def load_model(): tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) # Ensure a pad token exists for safe batching; use eos if needed if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( MODEL_ID, torch_dtype="auto", device_map="auto", trust_remote_code=True, ) model.eval() return tokenizer, model TOKENIZER, MODEL = load_model() @spaces.GPU def compute_entropy(code: str): if not code or not code.strip(): return "Please paste some source code.", None with torch.no_grad(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if next(MODEL.parameters()).device != device: MODEL.to(device) enc = TOKENIZER(code, return_tensors="pt") input_ids = enc["input_ids"] attention_mask = enc.get("attention_mask") input_ids = input_ids.to(device) if attention_mask is not None: attention_mask = attention_mask.to(device) # Need at least 2 tokens to compute next-token NLL if input_ids.shape[1] < 2: return "Input is too short to compute token-level entropy.", None outputs = MODEL(input_ids=input_ids, attention_mask=attention_mask) logits = outputs.logits # Shift for next-token prediction shift_logits = logits[:, :-1, :] shift_labels = input_ids[:, 1:] log_probs = torch.log_softmax(shift_logits, dim=-1) # Gather log prob of the true next token true_log_probs = log_probs.gather(2, shift_labels.unsqueeze(-1)).squeeze(-1) nll = -true_log_probs # negative log-likelihood (nats) nll_list = nll.squeeze(0).detach().cpu().tolist() label_ids = shift_labels.squeeze(0).detach().cpu().tolist() tokens = TOKENIZER.convert_ids_to_tokens(label_ids) rows = [] for tok, nll_val in zip(tokens, nll_list): prob = math.exp(-nll_val) rows.append([tok, float(nll_val), float(prob)]) avg_nll = sum(nll_list) / len(nll_list) avg_bits = avg_nll / math.log(2) # total entropy in bits is the sum of per-token NLL (bits) total_bits = sum(nll_list) / math.log(2) # total entropy in nats is simply the sum of per-token NLL (nats) total_nats = sum(nll_list) summary = ( f"Tokens evaluated: {len(nll_list)}\n" f"Average NLL (nats): {avg_nll:.4f}\n" f"Average NLL (bits): {avg_bits:.4f}\n" f"Total entropy (nats): {total_nats:.4f}\n" f"Total entropy (bits): {total_bits:.4f}" ) return summary, rows def build_app(): with gr.Blocks(title="Entropy for Source Code") as demo: gr.Markdown( f""" # Source Code Entropy ({MODEL_ID}) Paste code below to compute token-level negative log-likelihood (NLL). The table shows each token's NLL and probability under the model. """ ) code = gr.Textbox( label="Source Code", lines=16, placeholder="Paste your source code here...", ) btn = gr.Button("Compute Entropy") summary = gr.Textbox(label="Summary", lines=4) table = gr.Dataframe( headers=["token", "nll_nats", "prob"], datatype=["str", "number", "number"], label="Token-level NLL", ) btn.click(fn=compute_entropy, inputs=[code], outputs=[summary, table]) gr.Markdown( """ Notes: - NLL is computed for next-token prediction and excludes the first token. - Large inputs may take time to process depending on hardware. """ ) return demo app = build_app() if __name__ == "__main__": app.launch()