Spaces:

shreyask
/

microembeddings

Sleeping

shreyask commited on Mar 3

Commit

ac7577a

verified ·

1 Parent(s): d118fa3

fix: LR schedule + analogy tab defaults

Files changed (2) hide show

app.py CHANGED Viewed

@@ -209,13 +209,13 @@ with gr.Blocks(title="microembeddings", theme=gr.themes.Soft()) as demo:
                 "Computed as: `B - A + C ≈ ?`"
             )
             with gr.Row():
-                a_input = gr.Textbox(label="A", placeholder="king", value="king")
-                b_input = gr.Textbox(label="B", placeholder="man", value="man")
                 c_input = gr.Textbox(label="C", placeholder="woman", value="woman")
             analogy_btn = gr.Button("Solve", variant="primary")
             gr.Examples(
-                [["king", "man", "woman"], ["paris", "france", "germany"],
-                 ["big", "bigger", "small"]],
                 inputs=[a_input, b_input, c_input]
             )
             analogy_text = gr.Textbox(label="Results", interactive=False, lines=6)

                 "Computed as: `B - A + C ≈ ?`"
             )
             with gr.Row():
+                a_input = gr.Textbox(label="A", placeholder="man", value="man")
+                b_input = gr.Textbox(label="B", placeholder="king", value="king")
                 c_input = gr.Textbox(label="C", placeholder="woman", value="woman")
             analogy_btn = gr.Button("Solve", variant="primary")
             gr.Examples(
+                [["man", "king", "woman"], ["france", "paris", "germany"],
+                 ["bigger", "big", "small"]],
                 inputs=[a_input, b_input, c_input]
             )
             analogy_text = gr.Textbox(label="Results", interactive=False, lines=6)

microembeddings.py CHANGED Viewed

@@ -80,7 +80,9 @@ def train(corpus, vocab_size, neg_dist, epochs=EPOCHS, embed_dim=EMBED_DIM,
     W = (np.random.randn(vocab_size, embed_dim) * scale).astype(np.float32)
     C = np.zeros((vocab_size, embed_dim), dtype=np.float32)
-    total_steps = epochs * len(corpus)
     step = 0
     losses = []

     W = (np.random.randn(vocab_size, embed_dim) * scale).astype(np.float32)
     C = np.zeros((vocab_size, embed_dim), dtype=np.float32)
+    # Each corpus position generates ~window context pairs on average
+    # (random window from 1..window, mean = (window+1)/2, times 2 sides)
+    total_steps = epochs * len(corpus) * window
     step = 0
     losses = []