Spaces:
Sleeping
Sleeping
Commit ·
dc52bd0
1
Parent(s): 82ada8a
Compute total entropy too.
Browse files
app.py
CHANGED
|
@@ -5,8 +5,8 @@ import torch
|
|
| 5 |
import gradio as gr
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
|
| 8 |
-
|
| 9 |
-
MODEL_ID = os.getenv("MODEL_ID", "bigcode/starcoder2-3b")
|
| 10 |
|
| 11 |
|
| 12 |
def load_model():
|
|
@@ -73,10 +73,16 @@ def compute_entropy(code: str):
|
|
| 73 |
|
| 74 |
avg_nll = sum(nll_list) / len(nll_list)
|
| 75 |
avg_bits = avg_nll / math.log(2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
summary = (
|
| 77 |
f"Tokens evaluated: {len(nll_list)}\n"
|
| 78 |
f"Average NLL (nats): {avg_nll:.4f}\n"
|
| 79 |
-
f"Average NLL (bits): {avg_bits:.4f}"
|
|
|
|
|
|
|
| 80 |
)
|
| 81 |
|
| 82 |
return summary, rows
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 7 |
|
| 8 |
+
MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen2.5-Coder-1.5B")
|
| 9 |
+
#MODEL_ID = os.getenv("MODEL_ID", "bigcode/starcoder2-3b")
|
| 10 |
|
| 11 |
|
| 12 |
def load_model():
|
|
|
|
| 73 |
|
| 74 |
avg_nll = sum(nll_list) / len(nll_list)
|
| 75 |
avg_bits = avg_nll / math.log(2)
|
| 76 |
+
# total entropy in bits is the sum of per-token NLL (bits)
|
| 77 |
+
total_bits = sum(nll_list) / math.log(2)
|
| 78 |
+
# total entropy in nats is simply the sum of per-token NLL (nats)
|
| 79 |
+
total_nats = sum(nll_list)
|
| 80 |
summary = (
|
| 81 |
f"Tokens evaluated: {len(nll_list)}\n"
|
| 82 |
f"Average NLL (nats): {avg_nll:.4f}\n"
|
| 83 |
+
f"Average NLL (bits): {avg_bits:.4f}\n"
|
| 84 |
+
f"Total entropy (nats): {total_nats:.4f}\n"
|
| 85 |
+
f"Total entropy (bits): {total_bits:.4f}"
|
| 86 |
)
|
| 87 |
|
| 88 |
return summary, rows
|