NextTokenPredictor

Runtime error

App Files Files Community

WithTravis commited on Jul 14, 2025

Commit

387f325

1 Parent(s): 3937a09

Test next word v0.1.0(Build 1)

Browse files

Files changed (4) hide show

Dockerfile +8 -4
README.md +4 -3
app.py +54 -0
requirements.txt +3 -2

Dockerfile CHANGED Viewed

@@ -5,12 +5,16 @@ FROM python:3.12
 RUN useradd -m -u 1000 user
 USER user
-ENV PATH="/home/user/.local/bin:$PATH"
-WORKDIR /app
 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
-COPY --chown=user . /app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 RUN useradd -m -u 1000 user
 USER user
+ENV HOME=/home/user \
+    PATH="/home/user/.local/bin:$PATH"
+WORKDIR $HOME/app
+RUN pip install --no-cache-dir --upgrade pip
 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user app.py app.py
+ENTRYPOINT ["solara", "run", "app.py", "--host=0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,12 +1,13 @@
 ---
 title: NextTokenPrediction
-emoji: 📊
 colorFrom: purple
-colorTo: indigo
 sdk: docker
-pinned: false
 license: gemma
 short_description: Visualization of probabilities of the next token Gemma3:1b
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: NextTokenPrediction
+emoji: 🔀
 colorFrom: purple
+colorTo: dark-blue
 sdk: docker
+pinned: true
 license: gemma
 short_description: Visualization of probabilities of the next token Gemma3:1b
+app_port: 7860
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,7 +1,61 @@
 from fastapi import FastAPI
 app = FastAPI()
 @app.get("/")
 def greet_json():
     return {"Hello": "World!"}

 from fastapi import FastAPI
+import solara
+import random
+import torch
+import torch.nn.functional as F
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForCausalLM
 app = FastAPI()
 @app.get("/")
 def greet_json():
     return {"Hello": "World!"}
+tokenizer = AutoTokenizer.from_pretrained('gemma-3-1b-it-qat-q4_0-gguf')
+model = AutoModelForCausalLM.from_pretrained('gemma-3-1b-it-qat-q4_0-gguf')
+text1 = solara.reactive("Never gonna give you up, never gonna let you")
+@solara.component
+def Page():
+  with solara.Column(margin=10):
+    solara.Markdown("#Next token prediction visualization")
+    solara.Markdown("I built this tool to help me understand autoregressive language models. For any given text, it gives the top 10 candidates to be the next token with their respective probabilities. The language model I'm using is the smallest version of google/gemma-3-1b-it-qat-q4_0-gguf, with 1B parameters.")
+    def on_action_cell(column, row_index):
+      text1.value += tokenizer.decode(top_10.indices[0][row_index])
+    cell_actions = [solara.CellAction(icon="mdi-thumb-up", name="Select", on_click=on_action_cell)]
+    solara.InputText("Enter text:", value=text1, continuous_update=True)
+    if text1.value != "":
+      tokens = tokenizer.encode(text1.value, return_tensors="pt")
+      spans1 = ""
+      spans2 = ""
+      for i, token in enumerate(tokens[0]):
+        random.seed(i)
+        random_color = ''.join([random.choice('0123456789ABCDEF') for k in range(6)])
+        spans1 += " " + f"<span style='font-family: helvetica; color: #{random_color}'>{token}</span>"
+        spans2 += " " + f"""<span style="
+            padding: 6px;
+            border-right: 3px solid white;
+            line-height: 3em;
+            font-family: courier;
+            background-color: #{random_color};
+            color: white;
+            position: relative;
+          "><span style="
+          position: absolute;
+          top: 5.5ch;
+          line-height: 1em;
+          left: -0.5px;
+          font-size: 0.45em"> {token}</span>{tokenizer.decode([token])}</span>"""
+      solara.Markdown(f'{spans2}')
+      solara.Markdown(f'{spans1}')
+      outputs = model.generate(tokens, max_new_tokens=1, output_scores=True, return_dict_in_generate=True, pad_token_id=tokenizer.eos_token_id)
+      scores = F.softmax(outputs.scores[0], dim=-1)
+      top_10 = torch.topk(scores, 10)
+      df = pd.DataFrame()
+      df["probs"] = top_10.values[0]
+      df["probs"] = [f"{value:.2%}" for value in df["probs"].values]
+      df["next token ID"] = [top_10.indices[0][i].numpy() for i in range(10)]
+      df["predicted next token"] = [tokenizer.decode(top_10.indices[0][i]) for i in range(10)]
+      solara.Markdown("###Prediction")
+      solara.DataFrame(df, items_per_page=10, cell_actions=cell_actions)
+Page()

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
-fastapi
-uvicorn[standard]

+solara
+pandas
+transformers[torch]