cstr commited on
Commit
398cf4f
·
verified ·
1 Parent(s): c31489f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +215 -1
app.py CHANGED
@@ -1 +1,215 @@
1
- Geben Sie einen Text ein, um die morphologischen Details für jedes Wort zu erhalten. Um dies als API zu verwenden, klicken Sie auf den "View API"-Link unten.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spacy
3
+ import os
4
+ from spacy import displacy
5
+
6
+ # --- 1. UI Translations ---
7
+ # A dictionary to hold all our UI text for both languages
8
+ UI_TEXT = {
9
+ "de": {
10
+ "title": "# Deutscher NLP-Analysator (mit spaCy)",
11
+ "subtitle": "Geben Sie einen Text ein, um die morphologischen Details für jedes Wort zu erhalten.\n**Um dies als API zu verwenden, klicken Sie auf den \"View API\"-Link unten.**",
12
+ "input_label": "Deutscher Text",
13
+ "input_placeholder": "Die schnellen braunen Füchse...",
14
+ "button_text": "Analysieren",
15
+ "tab_graphic": "Syntaktische Analyse (Grafik)",
16
+ "tab_table": "Visuelle Tabelle (Tokens)",
17
+ "tab_json": "Roh-JSON (für API)",
18
+ "html_label": "Abhängigkeits-Parse",
19
+ "table_label": "Analyse-Ergebnisse (Tabelle)",
20
+ "table_headers": ["Wort", "Lemma", "POS", "Tag (detailliert)", "Morphologie", "Abhängigkeit"],
21
+ "json_label": "Analyse-Ergebnisse (JSON)"
22
+ },
23
+ "en": {
24
+ "title": "# English NLP Analyzer (with spaCy)",
25
+ "subtitle": "Enter any text to get the morphological details for each word.\n**To use this as an API, click the \"View API\" link at the bottom.**",
26
+ "input_label": "English Text",
27
+ "input_placeholder": "The quick brown foxes...",
28
+ "button_text": "Analyze",
29
+ "tab_graphic": "Syntactic Analysis (Graphic)",
30
+ "tab_table": "Visual Table (Tokens)",
31
+ "tab_json": "Raw JSON (for API)",
32
+ "html_label": "Dependency Parse",
33
+ "table_label": "Analysis Results (Table)",
34
+ "table_headers": ["Word", "Lemma", "POS", "Tag (detailed)", "Morphology", "Dependency"],
35
+ "json_label": "Analysis Results (JSON)"
36
+ }
37
+ }
38
+
39
+ # --- 2. Model Loading ---
40
+ MODEL_NAMES = {
41
+ "de": "de_core_news_sm",
42
+ "en": "en_core_web_sm"
43
+ }
44
+
45
+ def load_model(model_name):
46
+ """Checks if model is installed and downloads it if not."""
47
+ try:
48
+ nlp = spacy.load(model_name)
49
+ print(f"{model_name} loaded successfully.")
50
+ except OSError:
51
+ print(f"{model_name} not found. Downloading...")
52
+ os.system(f"python -m spacy download {model_name}")
53
+ nlp = spacy.load(model_name)
54
+ print(f"{model_name} downloaded and loaded.")
55
+ return nlp
56
+
57
+ # Load all models at startup and store them in a dictionary
58
+ print("Loading models...")
59
+ MODELS = {
60
+ "de": load_model(MODEL_NAMES["de"]),
61
+ "en": load_model(MODEL_NAMES["en"])
62
+ }
63
+ print("All models loaded.")
64
+
65
+ # --- 3. The Core Processing Function ---
66
+ def get_analysis(lang, text):
67
+ """
68
+ Processes text in the selected language and returns THREE formats:
69
+ 1. A list of lists for the visual DataFrame.
70
+ 2. A list of dicts for the JSON API.
71
+ 3. An HTML string for the dependency parse visualization.
72
+ """
73
+ if not text:
74
+ return [], [], "" # Return empty for all three outputs
75
+
76
+ # Select the correct pre-loaded model
77
+ lang_code = lang.lower()
78
+ nlp = MODELS[lang_code]
79
+
80
+ doc = nlp(text)
81
+
82
+ # 1. Data for the visual DataFrame
83
+ dataframe_output = []
84
+
85
+ # 2. Data for the JSON API
86
+ json_output = []
87
+
88
+ for token in doc:
89
+ # Add data for the JSON API
90
+ json_output.append({
91
+ "word": token.text,
92
+ "lemma": token.lemma_,
93
+ "pos": token.pos_,
94
+ "tag": token.tag_,
95
+ "morphology": str(token.morph),
96
+ "dependency": token.dep_,
97
+ "is_stopword": token.is_stop
98
+ })
99
+
100
+ # Add data for the visual DataFrame
101
+ dataframe_output.append([
102
+ token.text,
103
+ token.lemma_,
104
+ token.pos_,
105
+ token.tag_,
106
+ str(token.morph),
107
+ token.dep_
108
+ ])
109
+
110
+ # 3. Data for the HTML/DisplaCy visualization
111
+ options = {"compact": True, "bg": "#ffffff", "color": "#000000", "font": "Source Sans Pro"}
112
+ html = displacy.render(
113
+ doc,
114
+ style="dep",
115
+ jupyter=False,
116
+ options=options
117
+ )
118
+
119
+ styled_html = f"""
120
+ <div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; line-height: 2.5;">
121
+ {html}
122
+ </div>
123
+ """
124
+
125
+ # Return all three formats
126
+ return dataframe_output, json_output, styled_html
127
+
128
+ # --- 4. UI Update Function ---
129
+ def update_ui(lang):
130
+ """Updates all UI components when the language is changed."""
131
+ lang_code = lang.lower()
132
+ ui_config = UI_TEXT[lang_code]
133
+
134
+ # Return a dictionary mapping components to their new configurations
135
+ return {
136
+ markdown_title: gr.Markdown(value=ui_config["title"]),
137
+ markdown_subtitle: gr.Markdown(value=ui_config["subtitle"]),
138
+ text_input: gr.Textbox(
139
+ label=ui_config["input_label"],
140
+ placeholder=ui_config["input_placeholder"]
141
+ ),
142
+ analyze_button: gr.Button(value=ui_config["button_text"]),
143
+ tab_graphic: gr.Tab(label=ui_config["tab_graphic"]),
144
+ tab_table: gr.Tab(label=ui_config["tab_table"]),
145
+ tab_json: gr.Tab(label=ui_config["tab_json"]),
146
+ html_out: gr.HTML(label=ui_config["html_label"]),
147
+ df_out: gr.DataFrame(
148
+ label=ui_config["table_label"],
149
+ headers=ui_config["table_headers"],
150
+ interactive=False
151
+ ),
152
+ json_out: gr.JSON(label=ui_config["json_label"])
153
+ }
154
+
155
+ # --- 5. Gradio Interface ---
156
+ with gr.Blocks() as demo:
157
+ # Set default UI to German ("de")
158
+ default_config = UI_TEXT["de"]
159
+
160
+ # Language selector
161
+ lang_radio = gr.Radio(
162
+ ["DE", "EN"],
163
+ label="Sprache / Language",
164
+ value="DE"
165
+ )
166
+
167
+ markdown_title = gr.Markdown(default_config["title"])
168
+ markdown_subtitle = gr.Markdown(default_config["subtitle"])
169
+
170
+ text_input = gr.Textbox(
171
+ label=default_config["input_label"],
172
+ placeholder=default_config["input_placeholder"],
173
+ lines=5
174
+ )
175
+
176
+ analyze_button = gr.Button(default_config["button_text"])
177
+
178
+ with gr.Tabs() as tabs:
179
+ with gr.Tab(default_config["tab_graphic"]) as tab_graphic:
180
+ html_out = gr.HTML(label=default_config["html_label"])
181
+
182
+ with gr.Tab(default_config["tab_table"]) as tab_table:
183
+ df_out = gr.DataFrame(
184
+ label=default_config["table_label"],
185
+ headers=default_config["table_headers"],
186
+ interactive=False
187
+ )
188
+
189
+ with gr.Tab(default_config["tab_json"]) as tab_json:
190
+ json_out = gr.JSON(label=default_config["json_label"])
191
+
192
+ # --- 6. Event Listeners ---
193
+
194
+ # When the Analyze button is clicked
195
+ analyze_button.click(
196
+ fn=get_analysis,
197
+ inputs=[lang_radio, text_input],
198
+ outputs=[df_out, json_out, html_out],
199
+ api_name="get_morphology" # This API will now require 'lang' as the first input
200
+ )
201
+
202
+ # When the Language radio button is changed
203
+ lang_radio.change(
204
+ fn=update_ui,
205
+ inputs=lang_radio,
206
+ outputs=[
207
+ markdown_title, markdown_subtitle, text_input, analyze_button,
208
+ tab_graphic, tab_table, tab_json,
209
+ html_out, df_out, json_out
210
+ ]
211
+ )
212
+
213
+ # Launch the app
214
+ demo.launch()
215
+