HLasse commited on
Commit
afbf56e
·
1 Parent(s): ba443b2

update to gradio

Browse files
Files changed (9) hide show
  1. .gitattributes +0 -34
  2. README.md +24 -9
  3. app.py +252 -194
  4. data_viewer.py +0 -26
  5. options.py +14 -2
  6. process_text.py +0 -2
  7. pyproject.toml +24 -0
  8. requirements.txt +5 -6
  9. uv.lock +0 -0
.gitattributes DELETED
@@ -1,34 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tflite filter=lfs diff=lfs merge=lfs -text
29
- *.tgz filter=lfs diff=lfs merge=lfs -text
30
- *.wasm filter=lfs diff=lfs merge=lfs -text
31
- *.xz filter=lfs diff=lfs merge=lfs -text
32
- *.zip filter=lfs diff=lfs merge=lfs -text
33
- *.zst filter=lfs diff=lfs merge=lfs -text
34
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -1,14 +1,29 @@
1
  ---
2
- title: Textdescriptives
3
- emoji: 📈
4
- colorFrom: green
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.19.0
8
  app_file: app.py
 
9
  pinned: false
10
- license: apache-2.0
11
- tags: [NLP, feature extraction]
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: TextDescriptives
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: green
6
+ sdk: gradio
7
+ sdk_version: "5.12.0"
8
  app_file: app.py
9
+ python_version: "3.10"
10
  pinned: false
 
 
11
  ---
12
 
13
+ # TextDescriptives Demo
14
+
15
+ A Gradio dashboard for extracting text metrics with TextDescriptives. Live at https://huggingface.co/spaces/HLasse/textdescriptives
16
+
17
+
18
+ ## TODO
19
+
20
+ [ ] Add license
21
+
22
+
23
+ ## Installation
24
+
25
+ ```shell
26
+ uv venv && source .venv/bin/activate
27
+ uv pip install -e ".[models]"
28
+ python app.py
29
+ ```
app.py CHANGED
@@ -1,191 +1,32 @@
1
  """
2
  Dashboard for showcasing extraction of text metrics with textdescriptives.
3
-
4
  """
5
 
6
- from io import StringIO
7
 
 
8
  import pandas as pd
9
- import streamlit as st
10
  import textdescriptives as td
11
 
12
- from data_viewer import DataViewer
13
- from process_text import text_to_metrics
14
  from options import (
15
  all_model_size_options_pretty_to_short,
16
  available_model_size_options,
17
  language_options,
18
  metrics_options,
19
  )
 
20
 
21
- ################
22
- # Introduction #
23
- ################
24
-
25
-
26
- col1, col2 = st.columns([9, 2])
27
- with col1:
28
- st.title("Extract Text Statistics")
29
- with col2:
30
- st.image(
31
- "https://github.com/HLasse/TextDescriptives/raw/main/docs/_static/icon.png",
32
- width=125,
33
- )
34
-
35
- st.write(
36
- "Calculate a large variety of statistics from text via the "
37
- "[**TextDescriptives**](https://github.com/HLasse/TextDescriptives) python package "
38
- f"(v/{td.__version__}) and download the results as a .csv file. "
39
- "Includes descriptive statistics and metrics related to readability, "
40
- "information theory, text coherence and text quality."
41
- )
42
-
43
- st.write(
44
- "The source code for this application can be found on [**GitHub**](https://github.com/HLasse/TextDescriptives_app). "
45
- "If you have feedback, please open an [issue](https://github.com/HLasse/textdescriptives_app/issues)."
46
- )
47
-
48
- st.caption(
49
- "Hansen, L., Olsen, L. R., & Enevoldsen, K. (2023). TextDescriptives: A Python package for "
50
- "calculating a large variety of metrics from text. [Journal of Open Source Software, 8(84), "
51
- "5153, https://doi.org/10.21105/joss.05153](https://doi.org/10.21105/joss.05153)"
52
- )
53
-
54
-
55
- ############
56
- # Settings #
57
- ############
58
-
59
-
60
- input_choice = st.radio(
61
- label="Input", options=["Enter text", "Upload file(s)"], index=0, horizontal=True
62
- )
63
-
64
- with st.form(key="settings_form"):
65
- split_by_line = st.checkbox(label="Split by newline", value=True)
66
-
67
- file_name_to_text_string = {}
68
-
69
- if input_choice == "Upload file(s)":
70
- uploaded_files = st.file_uploader(
71
- label="Choose a .txt file", type=["txt"], accept_multiple_files=True
72
- )
73
-
74
- if uploaded_files is not None and len(uploaded_files) > 0:
75
- # To convert to a string based IO:
76
- file_name_to_text_string = {
77
- file.name: StringIO(file.getvalue().decode("utf-8")).read()
78
- for file in uploaded_files
79
- }
80
-
81
- else:
82
- default_text = """Hello, morning dew. The grass whispers low.
83
  I'm here to dance. The gentle breeze does show.
84
  Good morning, world. The birds sing in delight.
85
  Let's spread our wings. The butterflies take flight.
86
  Nature's chorus sings, a symphony of light."""
87
 
88
- file_name_to_text_string = {
89
- "input": st.text_area(
90
- label="Enter text", value=default_text, height=145, max_chars=None
91
- )
92
- }
93
-
94
- # Row of selectors
95
- col1, col2 = st.columns([1, 1])
96
-
97
- with col1:
98
- # Selection of language
99
- language_pretty = st.selectbox(
100
- label="Language",
101
- options=list(language_options().keys()),
102
- index=5,
103
- key="language_selector",
104
- )
105
-
106
- language_short = language_options()[language_pretty]
107
-
108
- with col2:
109
- # Selection of model size
110
- model_size_pretty = st.selectbox(
111
- label="Model Size",
112
- options=available_model_size_options(lang="all"),
113
- index=0,
114
- key="size_selector",
115
- )
116
-
117
- model_size_short = all_model_size_options_pretty_to_short()[model_size_pretty]
118
-
119
- # Multiselection of metrics
120
- metrics = st.multiselect(
121
- label="Metrics", options=metrics_options(), default=metrics_options()
122
- )
123
-
124
- st.write(
125
- "See the [**documentation**](https://hlasse.github.io/TextDescriptives/) for "
126
- "information on the available metrics."
127
- )
128
-
129
- # This shouldn't happen but better safe than sorry
130
- if isinstance(metrics, list) and not metrics:
131
- metrics = None
132
-
133
- apply_settings_button = st.form_submit_button(label="Apply")
134
-
135
-
136
- #############
137
- # Apply NLP #
138
- #############
139
-
140
-
141
- if apply_settings_button and len(file_name_to_text_string) > 0:
142
- if model_size_pretty not in available_model_size_options(lang=language_short):
143
- st.write(
144
- "**Sorry!** The chosen *model size* is not available in this language. Please try another."
145
- )
146
- else:
147
- # Extract metrics for each text
148
- output_df = pd.concat(
149
- [
150
- text_to_metrics(
151
- string=string,
152
- language_short=language_short,
153
- model_size_short=model_size_short,
154
- metrics=metrics,
155
- split_by_line=split_by_line,
156
- filename=filename if "Upload" in input_choice else None,
157
- )
158
- for filename, string in file_name_to_text_string.items()
159
- ],
160
- ignore_index=True,
161
- )
162
-
163
- ###################
164
- # Present Results #
165
- ###################
166
-
167
- # Create 2 columns with 1) the output header
168
- # and 2) a download button
169
- DataViewer()._header_and_download(
170
- header="The calculated metrics",
171
- data=output_df,
172
- file_name="text_metrics.csv",
173
- )
174
-
175
- st.write("**Note**: This data frame has been transposed for readability.")
176
- output_df = output_df.transpose().reset_index()
177
- output_df.columns = ["Metric"] + [str(c) for c in list(output_df.columns)[1:]]
178
- st.dataframe(data=output_df, use_container_width=True)
179
-
180
 
181
- ############################
182
- # Code For Reproducibility #
183
- ############################
184
-
185
-
186
- with st.expander("See python code"):
187
- st.code(
188
- """
189
  # Note: This is the code for a single text file
190
  # The actual code is slightly more complex
191
  # to allow processing multiple files at once
@@ -219,39 +60,256 @@ extracted_metrics = td.extract_metrics(
219
  spacy_model_size=model_size,
220
  metrics=metrics
221
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
- """,
224
- language="python",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  )
226
 
227
- #######
228
- # FAQ #
229
- #######
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
- st.subheader("Frequently Asked Questions (FAQ)")
232
 
233
- with st.expander("What does the 'Split by newline' option do?"):
234
- st.write(
235
- """
236
- When the `Split by newline` option is `enabled`, the metrics calculation is
237
- performed separately for each paragraph. I.e. whenever there's a line break,
238
- we split the text.
239
 
240
- When this option is `disabled`, the entire text is processed at once.
241
- """
 
 
 
 
242
  )
243
 
244
- with st.expander(
245
- "Why do I get a warning/error message for certain languages or model sizes?"
246
- ):
247
- st.write(
248
- """
249
- Some combinations of languages, model sizes, and metrics are not currently supported in the app.
250
- While we *are* working on this, you may currently see a red box
251
- with an error message after clicking `Apply`.
252
-
253
- If you need this language and/or model size to work for your project,
254
- please open an [issue](https://github.com/HLasse/textdescriptives_app/issues).
255
- This may cause us to prioritize supporting your use case.
256
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  Dashboard for showcasing extraction of text metrics with textdescriptives.
 
3
  """
4
 
5
+ import tempfile
6
 
7
+ import gradio as gr
8
  import pandas as pd
 
9
  import textdescriptives as td
10
 
 
 
11
  from options import (
12
  all_model_size_options_pretty_to_short,
13
  available_model_size_options,
14
  language_options,
15
  metrics_options,
16
  )
17
+ from process_text import text_to_metrics
18
 
19
+ DEFAULT_TEXT = """Hello, morning dew. The grass whispers low.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  I'm here to dance. The gentle breeze does show.
21
  Good morning, world. The birds sing in delight.
22
  Let's spread our wings. The butterflies take flight.
23
  Nature's chorus sings, a symphony of light."""
24
 
25
+ LANG_OPTIONS = language_options()
26
+ LANG_NAMES = list(LANG_OPTIONS.keys())
27
+ DEFAULT_LANG_INDEX = LANG_NAMES.index("English")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ CODE_SNIPPET = """\
 
 
 
 
 
 
 
30
  # Note: This is the code for a single text file
31
  # The actual code is slightly more complex
32
  # to allow processing multiple files at once
 
60
  spacy_model_size=model_size,
61
  metrics=metrics
62
  )
63
+ """
64
+
65
+ CSS = """
66
+ .citation {
67
+ font-size: 0.85em;
68
+ color: #666;
69
+ }
70
+ """
71
+
72
+
73
+ def toggle_input(choice):
74
+ if choice == "Upload file(s)":
75
+ return gr.update(visible=False), gr.update(visible=True)
76
+ return gr.update(visible=True), gr.update(visible=False)
77
 
78
+
79
+ def process_and_display(
80
+ input_choice,
81
+ text_input,
82
+ files,
83
+ split_by_line,
84
+ language_pretty,
85
+ model_size_pretty,
86
+ metrics,
87
+ ):
88
+ if not metrics:
89
+ return (
90
+ gr.update(value="**Please select at least one metric.**", visible=True),
91
+ gr.update(visible=False),
92
+ gr.update(visible=False),
93
+ None,
94
+ )
95
+
96
+ language_short = LANG_OPTIONS[language_pretty]
97
+ model_size_short = all_model_size_options_pretty_to_short()[model_size_pretty]
98
+
99
+ if model_size_pretty not in available_model_size_options(lang=language_short):
100
+ return (
101
+ gr.update(
102
+ value="**Sorry!** The chosen *model size* is not available in this language. Please try another.",
103
+ visible=True,
104
+ ),
105
+ gr.update(visible=False),
106
+ gr.update(visible=False),
107
+ None,
108
+ )
109
+
110
+ # Build mapping of filename -> text
111
+ file_name_to_text = {}
112
+ if input_choice == "Upload file(s)":
113
+ if not files:
114
+ return (
115
+ gr.update(value="**Please upload at least one file.**", visible=True),
116
+ gr.update(visible=False),
117
+ gr.update(visible=False),
118
+ None,
119
+ )
120
+ for f in files:
121
+ with open(f, "r", encoding="utf-8") as fh:
122
+ file_name_to_text[f.rsplit("/", 1)[-1]] = fh.read()
123
+ else:
124
+ if not text_input or not text_input.strip():
125
+ return (
126
+ gr.update(value="**Please enter some text.**", visible=True),
127
+ gr.update(visible=False),
128
+ gr.update(visible=False),
129
+ None,
130
+ )
131
+ file_name_to_text["input"] = text_input
132
+
133
+ # Extract metrics for each text
134
+ output_df = pd.concat(
135
+ [
136
+ text_to_metrics(
137
+ string=string,
138
+ language_short=language_short,
139
+ model_size_short=model_size_short,
140
+ metrics=metrics,
141
+ split_by_line=split_by_line,
142
+ filename=filename if input_choice == "Upload file(s)" else None,
143
+ )
144
+ for filename, string in file_name_to_text.items()
145
+ ],
146
+ ignore_index=True,
147
  )
148
 
149
+ # Transpose for readability
150
+ transposed = output_df.transpose().reset_index()
151
+ transposed.columns = ["Metric"] + [str(c) for c in list(transposed.columns)[1:]]
152
+
153
+ # Write CSV to a temp file for download
154
+ csv_path = tempfile.NamedTemporaryFile(
155
+ suffix=".csv", delete=False, prefix="text_metrics_"
156
+ ).name
157
+ output_df.to_csv(csv_path, index=False)
158
+
159
+ return (
160
+ gr.update(
161
+ value="**Note**: This data frame has been transposed for readability.",
162
+ visible=True,
163
+ ),
164
+ gr.update(value=transposed, visible=True),
165
+ gr.update(value=csv_path, visible=True),
166
+ csv_path,
167
+ )
168
 
 
169
 
170
+ with gr.Blocks(title="TextDescriptives", css=CSS) as demo:
171
+ ################
172
+ # Introduction #
173
+ ################
 
 
174
 
175
+ gr.HTML(
176
+ '<div style="display:flex;align-items:center;gap:12px;">'
177
+ '<img src="https://github.com/HLasse/TextDescriptives/raw/main/docs/_static/icon.png" '
178
+ 'style="height:56px;width:auto;border-radius:8px;" />'
179
+ '<h1 style="margin:0;font-size:2em;">Extract Text Statistics</h1>'
180
+ '</div>'
181
  )
182
 
183
+ gr.Markdown(
184
+ f"Calculate a large variety of statistics from text via the "
185
+ f"[**TextDescriptives**](https://github.com/HLasse/TextDescriptives) python package "
186
+ f"(v/{td.__version__}) and download the results as a .csv file. "
187
+ f"Includes descriptive statistics and metrics related to readability, "
188
+ f"information theory, text coherence and text quality. "
189
+ f"Source on [**GitHub**](https://github.com/HLasse/TextDescriptives_app) "
190
+ f"— [open an issue](https://github.com/HLasse/textdescriptives_app/issues) for feedback."
191
+ )
192
+
193
+ gr.Markdown(
194
+ "Hansen, L., Olsen, L. R., & Enevoldsen, K. (2023). *TextDescriptives: A Python package for "
195
+ "calculating a large variety of metrics from text.* "
196
+ "[JOSS, 8(84), 5153](https://doi.org/10.21105/joss.05153)",
197
+ elem_classes="citation",
198
+ )
199
+
200
+ ############
201
+ # Settings #
202
+ ############
203
+
204
+ with gr.Group():
205
+ input_choice = gr.Radio(
206
+ choices=["Enter text", "Upload file(s)"],
207
+ value="Enter text",
208
+ label="Input",
209
+ )
210
+
211
+ text_input = gr.Textbox(
212
+ label="Enter text",
213
+ value=DEFAULT_TEXT,
214
+ lines=7,
215
+ visible=True,
216
+ )
217
+
218
+ file_upload = gr.File(
219
+ label="Choose .txt file(s)",
220
+ file_types=[".txt"],
221
+ file_count="multiple",
222
+ visible=False,
223
+ )
224
+
225
+ split_by_line = gr.Checkbox(label="Split by newline", value=True)
226
+
227
+ input_choice.change(
228
+ fn=toggle_input,
229
+ inputs=input_choice,
230
+ outputs=[text_input, file_upload],
231
+ )
232
+
233
+ with gr.Row():
234
+ language_dropdown = gr.Dropdown(
235
+ label="Language",
236
+ choices=LANG_NAMES,
237
+ value=LANG_NAMES[DEFAULT_LANG_INDEX],
238
+ )
239
+ model_size_dropdown = gr.Dropdown(
240
+ label="Model Size",
241
+ choices=available_model_size_options(lang="all"),
242
+ value=available_model_size_options(lang="all")[0],
243
+ )
244
+
245
+ metrics_select = gr.CheckboxGroup(
246
+ label="Metrics",
247
+ choices=metrics_options(),
248
+ value=metrics_options(),
249
  )
250
+
251
+ gr.Markdown(
252
+ "See the [**documentation**](https://hlasse.github.io/TextDescriptives/) "
253
+ "for information on the available metrics."
254
+ )
255
+
256
+ apply_btn = gr.Button("Apply", variant="primary")
257
+
258
+ #############
259
+ # Results #
260
+ #############
261
+
262
+ status_msg = gr.Markdown(visible=False)
263
+ results_table = gr.DataFrame(visible=False, label="Results")
264
+ csv_state = gr.State(value=None)
265
+ download_btn = gr.DownloadButton("Download CSV", visible=False, variant="primary")
266
+
267
+ apply_btn.click(
268
+ fn=process_and_display,
269
+ inputs=[
270
+ input_choice,
271
+ text_input,
272
+ file_upload,
273
+ split_by_line,
274
+ language_dropdown,
275
+ model_size_dropdown,
276
+ metrics_select,
277
+ ],
278
+ outputs=[status_msg, results_table, download_btn, csv_state],
279
+ )
280
+
281
+ ############################
282
+ # Code For Reproducibility #
283
+ ############################
284
+
285
+ with gr.Accordion("See python code", open=False):
286
+ gr.Code(value=CODE_SNIPPET, language="python", interactive=False)
287
+
288
+ #######
289
+ # FAQ #
290
+ #######
291
+
292
+ gr.Markdown("## FAQ")
293
+
294
+ with gr.Accordion("What does the 'Split by newline' option do?", open=False):
295
+ gr.Markdown(
296
+ "When the `Split by newline` option is `enabled`, the metrics calculation is "
297
+ "performed separately for each paragraph. I.e. whenever there's a line break, "
298
+ "we split the text.\n\n"
299
+ "When this option is `disabled`, the entire text is processed at once."
300
+ )
301
+
302
+ with gr.Accordion(
303
+ "Why do I get a warning/error message for certain languages or model sizes?",
304
+ open=False,
305
+ ):
306
+ gr.Markdown(
307
+ "Some combinations of languages, model sizes, and metrics are not currently supported in the app. "
308
+ "While we *are* working on this, you may currently see an error message after clicking `Apply`.\n\n"
309
+ "If you need this language and/or model size to work for your project, "
310
+ "please open an [issue](https://github.com/HLasse/textdescriptives_app/issues). "
311
+ "This may cause us to prioritize supporting your use case."
312
+ )
313
+
314
+ if __name__ == "__main__":
315
+ demo.launch()
data_viewer.py DELETED
@@ -1,26 +0,0 @@
1
- """
2
- Class for showing header and download button in the same row.
3
- """
4
-
5
- import streamlit as st
6
-
7
-
8
- class DataViewer:
9
- def _convert_df_to_csv(self, data, **kwargs):
10
- return data.to_csv(**kwargs).encode("utf-8")
11
-
12
- def _header_and_download(
13
- self, header, data, file_name, key=None, label="Download", help="Download data"
14
- ):
15
- col1, col2 = st.columns([9, 2])
16
- with col1:
17
- st.subheader(header)
18
- with col2:
19
- st.write("")
20
- st.download_button(
21
- label=label,
22
- data=self._convert_df_to_csv(data, index=False),
23
- file_name=file_name,
24
- key=key,
25
- help=help,
26
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
options.py CHANGED
@@ -80,9 +80,18 @@ def available_model_size_options(lang) -> List[str]:
80
 
81
 
82
  class ModelAvailabilityChecker:
 
 
83
  @staticmethod
84
  def available_models() -> List[str]:
85
- return list(get_compatibility().keys())
 
 
 
 
 
 
 
86
 
87
  @staticmethod
88
  def extract_language_and_size() -> List[List[str]]:
@@ -106,8 +115,11 @@ class ModelAvailabilityChecker:
106
 
107
  @staticmethod
108
  def available_model_sizes_for_language(lang: str) -> Set[str]:
109
- return set([
110
  size
111
  for (lang_, size) in ModelAvailabilityChecker.extract_language_and_size()
112
  if lang_ == lang and size in all_model_size_options_pretty_to_short().values()
113
  ])
 
 
 
 
80
 
81
 
82
  class ModelAvailabilityChecker:
83
+ _compatibility_cache = None
84
+
85
  @staticmethod
86
  def available_models() -> List[str]:
87
+ if ModelAvailabilityChecker._compatibility_cache is None:
88
+ try:
89
+ ModelAvailabilityChecker._compatibility_cache = list(
90
+ get_compatibility().keys()
91
+ )
92
+ except Exception:
93
+ ModelAvailabilityChecker._compatibility_cache = []
94
+ return ModelAvailabilityChecker._compatibility_cache
95
 
96
  @staticmethod
97
  def extract_language_and_size() -> List[List[str]]:
 
115
 
116
  @staticmethod
117
  def available_model_sizes_for_language(lang: str) -> Set[str]:
118
+ sizes = set([
119
  size
120
  for (lang_, size) in ModelAvailabilityChecker.extract_language_and_size()
121
  if lang_ == lang and size in all_model_size_options_pretty_to_short().values()
122
  ])
123
+ if not sizes:
124
+ return set(all_model_size_options_pretty_to_short().values())
125
+ return sizes
process_text.py CHANGED
@@ -3,12 +3,10 @@ The text processing functionality.
3
  """
4
 
5
  from typing import List, Optional
6
- import streamlit as st
7
  import pandas as pd
8
  import textdescriptives as td
9
 
10
 
11
- @st.cache_data
12
  def text_to_metrics(
13
  string: str,
14
  language_short: str,
 
3
  """
4
 
5
  from typing import List, Optional
 
6
  import pandas as pd
7
  import textdescriptives as td
8
 
9
 
 
10
  def text_to_metrics(
11
  string: str,
12
  language_short: str,
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "textdescriptives-app"
3
+ version = "0.1.0"
4
+ description = "Dashboard for extracting text metrics with TextDescriptives"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = [
8
+ "textdescriptives>=2.8.2",
9
+ "gradio>=5.0,<6.0",
10
+ "pandas",
11
+ "pip",
12
+ "en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz",
13
+ "da-core-news-lg @ https://github.com/explosion/spacy-models/releases/download/da_core_news_lg-3.8.0/da_core_news_lg-3.8.0.tar.gz",
14
+ ]
15
+
16
+ [tool.hatch.metadata]
17
+ allow-direct-references = true
18
+
19
+ [tool.hatch.build.targets.wheel]
20
+ packages = ["."]
21
+
22
+ [build-system]
23
+ requires = ["hatchling"]
24
+ build-backend = "hatchling.build"
requirements.txt CHANGED
@@ -1,6 +1,5 @@
1
- textdescriptives==2.8.2
2
- streamlit
3
- watchdog
4
- altair<5.0.0
5
- https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.0/en_core_web_lg-3.7.0.tar.gz
6
- https://github.com/explosion/spacy-models/releases/download/da_core_news_lg-3.7.0/da_core_news_lg-3.7.0.tar.gz
 
1
+ textdescriptives>=2.8.2
2
+ gradio>=5.0,<6.0
3
+ pandas
4
+ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0.tar.gz
5
+ https://github.com/explosion/spacy-models/releases/download/da_core_news_lg-3.8.0/da_core_news_lg-3.8.0.tar.gz
 
uv.lock ADDED
The diff for this file is too large to render. See raw diff