Philipp Normann commited on
Commit
d9bafc3
·
1 Parent(s): a0c5ceb

Sample words based onm frequency

Browse files
Files changed (1) hide show
  1. app.py +17 -11
app.py CHANGED
@@ -47,17 +47,25 @@ def load_vocabulary():
47
  token=os.getenv("HF_TOKEN"),
48
  filename="vocabulary.csv",
49
  local_dir="assets")
50
- return pl.read_csv("assets/vocabulary.csv").to_dicts()
 
 
 
 
 
 
 
 
 
 
51
 
52
 
53
  vocabulary = load_vocabulary()
54
- idx2vocab = {row["word_idx"]: row for row in vocabulary}
55
- vocab_list = [row["word"] for row in vocabulary]
56
 
57
 
58
- # Select a random word
59
  def get_random_word():
60
- return random.choice(vocab_list)
61
 
62
 
63
  # Process the image drawn on canvas
@@ -73,7 +81,7 @@ def process_image(image, current_word):
73
 
74
  predictions = []
75
  for pred, idx in zip(preds_i, indices_i):
76
- vocab = idx2vocab[idx]
77
  predictions.append({
78
  "word": vocab["word"],
79
  "category": vocab["category_name"],
@@ -110,9 +118,6 @@ def create_initial_image():
110
  return Image.fromarray(data)
111
 
112
 
113
- # Create a white image with the dimensions for the ImageEditor
114
- initial_image = create_initial_image
115
-
116
  # Interface definition
117
  with gr.Blocks(theme=gr.themes.Soft(),
118
  css="input {font-size: 24px; font-weight: 600;}") as demo_app:
@@ -121,7 +126,8 @@ with gr.Blocks(theme=gr.themes.Soft(),
121
 
122
  with gr.Row():
123
  word_output = gr.Textbox(label="Your word to draw:",
124
- value=get_random_word(),
 
125
  scale=1,
126
  max_lines=1)
127
  new_word_button = gr.Button("New Word", scale=0, variant="primary")
@@ -133,7 +139,7 @@ with gr.Blocks(theme=gr.themes.Soft(),
133
  sources=[],
134
  transforms=[],
135
  layers=False,
136
- value=initial_image,
137
  brush=gr.Brush(colors=["#000000", "#FF0000", "#00FF00", "#0000FF"],
138
  default_size=10))
139
  plot_output = gr.Plot(label="Model Guesses")
 
47
  token=os.getenv("HF_TOKEN"),
48
  filename="vocabulary.csv",
49
  local_dir="assets")
50
+ return pl.read_csv("assets/vocabulary.csv").sort("label_idx")
51
+
52
+
53
+ def compute_word_weights(vocabulary):
54
+ train_counts = vocabulary.group_by("word").agg(pl.col("train_count").sum())
55
+ total_train_count = train_counts["train_count"].sum()
56
+ word_weights = [(vocab["word"], vocab["train_count"] / total_train_count)
57
+ for vocab in vocabulary.rows(named=True)]
58
+ words = [word for word, _ in word_weights]
59
+ weights = [weight for _, weight in word_weights]
60
+ return words, weights
61
 
62
 
63
  vocabulary = load_vocabulary()
64
+ words, weights = compute_word_weights(vocabulary)
 
65
 
66
 
 
67
  def get_random_word():
68
+ return random.choices(words, weights=weights)[0]
69
 
70
 
71
  # Process the image drawn on canvas
 
81
 
82
  predictions = []
83
  for pred, idx in zip(preds_i, indices_i):
84
+ vocab = vocabulary.row(idx, named=True)
85
  predictions.append({
86
  "word": vocab["word"],
87
  "category": vocab["category_name"],
 
118
  return Image.fromarray(data)
119
 
120
 
 
 
 
121
  # Interface definition
122
  with gr.Blocks(theme=gr.themes.Soft(),
123
  css="input {font-size: 24px; font-weight: 600;}") as demo_app:
 
126
 
127
  with gr.Row():
128
  word_output = gr.Textbox(label="Your word to draw:",
129
+ value=get_random_word,
130
+ interactive=False,
131
  scale=1,
132
  max_lines=1)
133
  new_word_button = gr.Button("New Word", scale=0, variant="primary")
 
139
  sources=[],
140
  transforms=[],
141
  layers=False,
142
+ value=create_initial_image,
143
  brush=gr.Brush(colors=["#000000", "#FF0000", "#00FF00", "#0000FF"],
144
  default_size=10))
145
  plot_output = gr.Plot(label="Model Guesses")