fastuz commited on
Commit
dd71b94
ยท
verified ยท
1 Parent(s): 5c6553f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +798 -227
app.py CHANGED
@@ -3,249 +3,820 @@ import json
3
  from gliner2 import GLiNER2
4
  from huggingface_hub import login
5
  import os
 
 
6
 
7
  # Authenticate with Hugging Face
8
  hf_token = os.getenv("HF_TOKEN")
9
  login(hf_token)
10
 
11
- # Load model once
12
- model = GLiNER2.from_pretrained("fastino/gliner2-base-0207")
13
-
14
- def run_ner(text, types_csv, descs):
15
- types = [t.strip() for t in types_csv.split(",") if t.strip()]
16
- desc_map = {k: v for line in descs.split("\n") if ":" in line for k,v in [line.split(":",1)]}
17
- inp = desc_map if desc_map else types
18
- res = model.extract_entities(text=text, entity_types=inp, include_confidence=True)
19
- return model.pretty_print_results(res, include_confidence=True)
20
-
21
-
22
- def run_class(text, task, labels_csv, descs, multi):
23
- labels = [l.strip() for l in labels_csv.split(",") if l.strip()]
24
- desc_map = {k: v for line in descs.split("\n") if ":" in line for k,v in [line.split(":",1)]}
25
- inp = desc_map if desc_map else labels
26
- tasks = {
27
- task: {
28
- "labels": list(inp.keys()) if isinstance(inp,dict) else inp,
29
- "multi_label": multi,
30
- **({"label_descriptions": inp} if isinstance(inp,dict) else {})
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
- res = model.classify_text(text=text, tasks=tasks, include_confidence=True)
34
- return model.pretty_print_results(res, include_confidence=True)
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- def run_struct(text, struct_json):
38
  try:
39
- cfg = json.loads(struct_json)
40
- except json.JSONDecodeError as e:
41
- return f"โŒ Invalid JSON: {e}"
42
- res = model.extract_json(text=text, structures=cfg, include_confidence=True)
43
- return model.pretty_print_results(res, include_confidence=True)
44
-
45
- # Simplified CSS - uses default backgrounds
46
- custom_css = """
47
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&display=swap');
48
-
49
- body {
50
- font-family: 'Inter', sans-serif;
51
- }
52
 
53
- header.brand {
54
- padding: 2rem 0;
55
- text-align: center;
56
- }
 
 
57
 
58
- header.brand .logo {
59
- font-size: 2rem;
60
- font-weight: 700;
61
- color: #4f46e5;
62
- }
63
 
64
- header.brand .subtitle {
65
- margin-top: 0.2rem;
66
- font-size: 0.9rem;
67
- color: #6b7280;
68
- }
69
 
70
- .gr-button.primary {
71
- background: #4f46e5 !important;
72
- color: #fff !important;
73
- border-radius: 0.5rem;
74
- padding: 0.6rem 1.2rem;
75
- }
76
- """
77
-
78
- # Pre-made examples for each task (5 per tab)
79
- ner_examples = [
80
- [
81
- "Barack Obama visited Berlin in July 2013.",
82
- "person,location,date",
83
- "person: Full name\nlocation: City\ndate: Month and year"
84
- ],
85
- [
86
- "Apple released the iPhone 13 on September 14, 2021.",
87
- "organization,product,date",
88
- "organization: Company name\nproduct: Device name\ndate: Full date"
89
- ],
90
- [
91
- "Elon Musk announced Tesla's new Roadster at the LA Auto Show.",
92
- "person,organization,event,location",
93
- "person: Full name\norganization: Company name\nevent: Conference or show\nlocation: Venue"
94
- ],
95
- [
96
- "The UEFA Champions League Final takes place in Istanbul this year.",
97
- "event,location,date",
98
- "event: Sports event\nlocation: City\ndate: Year"
99
- ],
100
- [
101
- "Microsoft acquired GitHub in 2018 for $7.5 billion.",
102
- "organization,organization,date,price",
103
- "organization: Company name\ndate: Year\nprice: Acquisition value"
104
- ]
105
- ]
106
-
107
- class_examples = [
108
- [
109
- "The movie was a thrilling experience with stunning visuals.",
110
- "sentiment",
111
- "positive,negative,neutral",
112
- "positive: Positive sentiment\nnegative: Negative sentiment\nneutral: Mixed or neutral",
113
- False
114
- ],
115
- [
116
- "Our Q1 results were disappointing, with sales down 10%.",
117
- "financial_sentiment",
118
- "positive,negative,neutral",
119
- "positive: Gains\nnegative: Losses\nneutral: Flat",
120
- False
121
- ],
122
- [
123
- "I love the new interface but dislike the slow loading time.",
124
- "feedback",
125
- "praise,complaint,suggestion",
126
- "praise: Positive feedback\ncomplaint: Negative feedback\nsuggestion: Improvement ideas",
127
- True
128
- ],
129
- [
130
- "The product meets expectations but could use more features.",
131
- "review",
132
- "positive,negative",
133
- "positive: Meets expectations\nnegative: Lacking",
134
- False
135
- ],
136
- [
137
- "Customer support was helpful, though response times were slow.",
138
- "support_sentiment",
139
- "positive,negative,neutral",
140
- "positive: Helpful support\nnegative: Unhelpful support\nneutral: Mixed experiences",
141
- True
142
- ]
143
- ]
144
-
145
- struct_examples = [
146
- [
147
- "The iPad Pro comes with an M1 chip, 8GB RAM, 256GB storage, and a 12.9-inch display.",
148
- json.dumps({
149
- "device": [
150
- "name::str::Model name",
151
- "specs::list::Hardware specifications",
152
- "price::str::Device cost"
153
- ]
154
- }, indent=2)
155
- ],
156
- [
157
- "Plan: Write report (Due: May 10), Review code (Due: May 15), Deploy (Due: May 20)",
158
- json.dumps({
159
- "tasks": [
160
- "title::str::Task title",
161
- "due_date::str::Due date"
162
- ]
163
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  ],
165
- [
166
- "Product: Coffee Mug; Price: $12; Features: ceramic, dishwasher-safe, 12oz capacity.",
167
- json.dumps({
168
- "product": [
169
- "name::str::Product name",
170
- "price::str::Product price",
171
- "features::list::Product features"
172
- ]
173
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  ],
175
- [
176
- "Event: AI Conference; Date: August 22, 2025; Location: Paris; Topics: ML, Ethics, Robotics.",
177
- json.dumps({
178
- "event": [
179
- "name::str::Event name",
180
- "date::str::Event date",
181
- "location::str::Event location",
182
- "topics::list::Covered topics"
183
- ]
184
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
  ],
186
- [
187
- "Recipe: Pancakes; Ingredients: flour, eggs, milk; Steps: mix, cook, serve.",
188
- json.dumps({
189
- "recipe": [
190
- "title::str::Recipe title",
191
- "ingredients::list::List of ingredients",
192
- "steps::list::Preparation steps"
193
- ]
194
- }, indent=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  ]
196
- ]
197
 
198
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue"), css=custom_css) as demo:
199
- # Header
200
- gr.HTML(
201
- """
202
- <header class=\"brand\">
203
- <div class=\"logo\">โœจ GLiNER2</div>
204
- </header>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  """
206
- )
207
-
208
- with gr.Tabs():
209
- # NER Tab
210
- with gr.TabItem("๐Ÿ” Named Entity Recognition"):
211
- with gr.Row(elem_classes="card"):
212
- with gr.Column(scale=2):
213
- txt1 = gr.Textbox(label="Input Text", lines=5)
214
- types1 = gr.Textbox(label="Entity Types (CSV)")
215
- with gr.Accordion("Optional Descriptions", open=False):
216
- desc1 = gr.Textbox(lines=3)
217
- btn1 = gr.Button("Extract Entities", variant="primary")
218
- gr.Examples(examples=ner_examples, inputs=[txt1, types1, desc1], outputs=None, fn=lambda *args: None, cache_examples=False)
219
- with gr.Column(scale=1):
220
- out1 = gr.Code(language="json", label="Results", lines=8)
221
- btn1.click(run_ner, inputs=[txt1, types1, desc1], outputs=out1)
222
-
223
- # Classification Tab
224
- with gr.TabItem("๐Ÿ“ Text Classification"):
225
- with gr.Row(elem_classes="card"):
226
- with gr.Column(scale=2):
227
- txt2 = gr.Textbox(label="Input Text", lines=5)
228
- task2 = gr.Textbox(label="Task Name")
229
- labs2 = gr.Textbox(label="Labels (CSV)")
230
- with gr.Accordion("Optional Label Descriptions", open=False):
231
- desc2 = gr.Textbox(lines=3)
232
- multi2 = gr.Checkbox(label="Multi-label?")
233
- btn2 = gr.Button("Classify Text", variant="primary")
234
- gr.Examples(examples=class_examples, inputs=[txt2, task2, labs2, desc2, multi2], outputs=None, fn=lambda *args: None, cache_examples=False)
235
- with gr.Column(scale=1):
236
- out2 = gr.Code(language="json", label="Results", lines=8)
237
- btn2.click(run_class, inputs=[txt2, task2, labs2, desc2, multi2], outputs=out2)
238
-
239
- # Structure Extraction Tab
240
- with gr.TabItem("๐Ÿ“ Structure Extraction"):
241
- with gr.Row(elem_classes="card"):
242
- with gr.Column(scale=2):
243
- txt3 = gr.Textbox(label="Input Text", lines=5)
244
- struct3 = gr.Code(language="json", label="Schema (JSON)", lines=8)
245
- btn3 = gr.Button("Extract Structure", variant="primary")
246
- gr.Examples(examples=struct_examples, inputs=[txt3, struct3], outputs=None, fn=lambda *args: None, cache_examples=False)
247
- with gr.Column(scale=1):
248
- out3 = gr.Code(language="json", label="Results", lines=8)
249
- btn3.click(run_struct, inputs=[txt3, struct3], outputs=out3)
250
-
251
- demo.launch(share=False, width=800)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from gliner2 import GLiNER2
4
  from huggingface_hub import login
5
  import os
6
+ from typing import Dict, Any, List
7
+ import torch
8
 
9
  # Authenticate with Hugging Face
10
  hf_token = os.getenv("HF_TOKEN")
11
  login(hf_token)
12
 
13
+
14
+ # ============================================================================
15
+ # Pre-load Model
16
+ # ============================================================================
17
+
18
+ print("๐Ÿš€ Loading GLiNER2 model...")
19
+ print("This may take a minute on first run (downloading model)...")
20
+
21
+ DEFAULT_MODEL = "fastino/gliner2-large-2907"
22
+ EXTRACTOR = None
23
+
24
+ if MODEL_AVAILABLE:
25
+ try:
26
+ EXTRACTOR = GLiNER2.from_pretrained(DEFAULT_MODEL)
27
+ print(f"โœ… Model loaded successfully: {DEFAULT_MODEL}")
28
+ except Exception as e:
29
+ print(f"โŒ Failed to load model: {e}")
30
+ print("Demo will run in UI-only mode.")
31
+ else:
32
+ print("โš ๏ธ GLiNER2 not available. Demo will run in UI-only mode.")
33
+
34
+
35
+ # ============================================================================
36
+ # Helper Functions
37
+ # ============================================================================
38
+
39
+ def parse_classification_tasks(tasks_text: str, threshold: float):
40
+ """Parse multi-line classification task definitions.
41
+
42
+ Format:
43
+ task_name: label1, label2, label3
44
+ another_task (multi): label1, label2
45
+ """
46
+ tasks = {}
47
+
48
+ for line in tasks_text.strip().split("\n"):
49
+ line = line.strip()
50
+ if not line:
51
+ continue
52
+
53
+ # Check for multi-label indicator
54
+ multi_label = False
55
+ if "(multi)" in line or "(multi-label)" in line:
56
+ multi_label = True
57
+ line = line.replace("(multi)", "").replace("(multi-label)", "")
58
+
59
+ # Parse task_name: label1, label2, label3
60
+ if ":" not in line:
61
+ continue
62
+
63
+ parts = line.split(":", 1)
64
+ task_name = parts[0].strip()
65
+ labels_str = parts[1].strip()
66
+
67
+ if not task_name or not labels_str:
68
+ continue
69
+
70
+ # Parse labels
71
+ labels = [l.strip() for l in labels_str.split(",") if l.strip()]
72
+
73
+ # Build task config
74
+ if labels:
75
+ tasks[task_name] = {
76
+ "labels": labels,
77
+ "multi_label": multi_label,
78
+ "cls_threshold": threshold
79
+ }
80
+
81
+ return tasks
82
+
83
+
84
+ def parse_json_structures(structures_text: str):
85
+ """Parse multi-structure JSON definitions.
86
+
87
+ Format:
88
+ [structure_name]
89
+ field1::str::description
90
+ field2::list
91
+
92
+ [another_structure]
93
+ field3::str
94
+ """
95
+ structures = {}
96
+ current_structure = None
97
+ current_fields = []
98
+
99
+ for line in structures_text.strip().split("\n"):
100
+ line = line.strip()
101
+ if not line:
102
+ continue
103
+
104
+ # Check for structure header: [structure_name]
105
+ if line.startswith("[") and line.endswith("]"):
106
+ # Save previous structure
107
+ if current_structure and current_fields:
108
+ structures[current_structure] = current_fields
109
+ # Start new structure
110
+ current_structure = line[1:-1].strip()
111
+ current_fields = []
112
+ else:
113
+ # Add field to current structure
114
+ if current_structure:
115
+ current_fields.append(line)
116
+
117
+ # Save last structure
118
+ if current_structure and current_fields:
119
+ structures[current_structure] = current_fields
120
+
121
+ return structures
122
+
123
+
124
+ def parse_combined_schema(schema_text: str, threshold: float):
125
+ """Parse combined schema with multiple task types.
126
+
127
+ Format:
128
+ <entities>
129
+ person, company, location
130
+
131
+ <classification>
132
+ sentiment: positive, negative, neutral
133
+
134
+ <structures>
135
+ [contact]
136
+ name::str
137
+ email::str
138
+ """
139
+ result = {
140
+ "entities": None,
141
+ "classification": None,
142
+ "structures": None
143
  }
 
 
144
 
145
+ current_section = None
146
+ section_content = []
147
+
148
+ for line in schema_text.strip().split("\n"):
149
+ stripped = line.strip()
150
+
151
+ # Check for section headers
152
+ if stripped in ["<entities>", "<classification>", "<structures>"]:
153
+ # Save previous section
154
+ if current_section and section_content:
155
+ content = "\n".join(section_content)
156
+ if current_section == "entities":
157
+ # Parse comma-separated entities
158
+ result["entities"] = [e.strip() for e in content.split(",") if e.strip()]
159
+ elif current_section == "classification":
160
+ result["classification"] = parse_classification_tasks(content, threshold)
161
+ elif current_section == "structures":
162
+ result["structures"] = parse_json_structures(content)
163
+
164
+ # Start new section
165
+ current_section = stripped[1:-1] # Remove < >
166
+ section_content = []
167
+ else:
168
+ # Add line to current section
169
+ if current_section and stripped:
170
+ section_content.append(line)
171
+
172
+ # Save last section
173
+ if current_section and section_content:
174
+ content = "\n".join(section_content)
175
+ if current_section == "entities":
176
+ result["entities"] = [e.strip() for e in content.split(",") if e.strip()]
177
+ elif current_section == "classification":
178
+ result["classification"] = parse_classification_tasks(content, threshold)
179
+ elif current_section == "structures":
180
+ result["structures"] = parse_json_structures(content)
181
+
182
+ return result
183
+
184
+
185
+ # ============================================================================
186
+ # Demo Functions
187
+ # ============================================================================
188
+
189
+ def extract_entities_demo(text: str, entity_types: str, threshold: float):
190
+ """Demo for entity extraction."""
191
+ if EXTRACTOR is None:
192
+ return json.dumps({"error": "Model not loaded. Please check the console for errors."}, indent=2)
193
+
194
+ if not text.strip():
195
+ return json.dumps({"error": "Please enter some text to analyze."}, indent=2)
196
+
197
+ if not entity_types.strip():
198
+ return json.dumps({"error": "Please specify entity types (comma-separated)."}, indent=2)
199
 
 
200
  try:
201
+ # Parse entity types
202
+ entities = [e.strip() for e in entity_types.split(",") if e.strip()]
 
 
 
 
 
 
 
 
 
 
 
203
 
204
+ # Extract
205
+ results = EXTRACTOR.extract_entities(
206
+ text,
207
+ entities,
208
+ threshold=threshold
209
+ )
210
 
211
+ # JSON output
212
+ return json.dumps(results, indent=2)
 
 
 
213
 
214
+ except Exception as e:
215
+ return json.dumps({"error": str(e)}, indent=2)
 
 
 
216
 
217
+
218
+ def classify_text_demo(text: str, tasks_text: str, threshold: float):
219
+ """Demo for text classification with support for multiple tasks."""
220
+ if EXTRACTOR is None:
221
+ return json.dumps({"error": "Model not loaded. Please check the console for errors."}, indent=2)
222
+
223
+ if not text.strip():
224
+ return json.dumps({"error": "Please enter some text to classify."}, indent=2)
225
+
226
+ if not tasks_text.strip():
227
+ return json.dumps({"error": "Please specify classification tasks (one per line)."}, indent=2)
228
+
229
+ try:
230
+ # Parse tasks
231
+ tasks = parse_classification_tasks(tasks_text, threshold)
232
+
233
+ if not tasks:
234
+ return json.dumps({"error": "No valid tasks found. Use format: task_name: label1, label2, label3"},
235
+ indent=2)
236
+
237
+ # Classify
238
+ results = EXTRACTOR.classify_text(text, tasks)
239
+
240
+ # JSON output
241
+ return json.dumps(results, indent=2)
242
+
243
+ except Exception as e:
244
+ return json.dumps({"error": str(e)}, indent=2)
245
+
246
+
247
+ def extract_json_demo(text: str, structures_text: str, threshold: float):
248
+ """Demo for structured JSON extraction with support for multiple structures."""
249
+ if EXTRACTOR is None:
250
+ return json.dumps({"error": "Model not loaded. Please check the console for errors."}, indent=2)
251
+
252
+ if not text.strip():
253
+ return json.dumps({"error": "Please enter some text to analyze."}, indent=2)
254
+
255
+ if not structures_text.strip():
256
+ return json.dumps({"error": "Please specify structure definitions."}, indent=2)
257
+
258
+ try:
259
+ # Parse structures
260
+ structures = parse_json_structures(structures_text)
261
+
262
+ if not structures:
263
+ return json.dumps({"error": "No valid structures found. Use format: [structure_name] followed by fields."},
264
+ indent=2)
265
+
266
+ # Extract
267
+ results = EXTRACTOR.extract_json(text, structures, threshold=threshold)
268
+
269
+ # JSON output
270
+ return json.dumps(results, indent=2)
271
+
272
+ except Exception as e:
273
+ return json.dumps({"error": str(e)}, indent=2)
274
+
275
+
276
+ def combined_demo(text: str, schema_text: str, threshold: float):
277
+ """Combined extraction with entities, classification, and structures."""
278
+ if EXTRACTOR is None:
279
+ return json.dumps({"error": "Model not loaded. Please check the console for errors."}, indent=2)
280
+
281
+ if not text.strip():
282
+ return json.dumps({"error": "Please enter some text to analyze."}, indent=2)
283
+
284
+ if not schema_text.strip():
285
+ return json.dumps({"error": "Please define at least one task section."}, indent=2)
286
+
287
+ try:
288
+ # Parse schema
289
+ parsed = parse_combined_schema(schema_text, threshold)
290
+
291
+ # Check if at least one section is defined
292
+ if not any([parsed["entities"], parsed["classification"], parsed["structures"]]):
293
+ return json.dumps(
294
+ {"error": "No valid tasks found. Use <entities>, <classification>, or <structures> sections."},
295
+ indent=2)
296
+
297
+ # Build schema using GLiNER2's create_schema API
298
+ schema = EXTRACTOR.create_schema()
299
+
300
+ # Add entities if defined
301
+ if parsed["entities"]:
302
+ schema = schema.entities(parsed["entities"])
303
+
304
+ # Add classifications if defined
305
+ if parsed["classification"]:
306
+ for task_name, task_config in parsed["classification"].items():
307
+ schema = schema.classification(
308
+ task_name,
309
+ task_config["labels"],
310
+ multi_label=task_config["multi_label"],
311
+ cls_threshold=task_config["cls_threshold"]
312
+ )
313
+
314
+ # Add structures if defined
315
+ if parsed["structures"]:
316
+ for struct_name, fields in parsed["structures"].items():
317
+ struct_schema = schema.structure(struct_name)
318
+ for field_spec in fields:
319
+ # Parse field specification: field_name::type::description
320
+ parts = field_spec.split("::")
321
+ field_name = parts[0].strip()
322
+
323
+ # Default values
324
+ dtype = "list"
325
+ description = None
326
+ choices = None
327
+
328
+ # Parse type and description if provided
329
+ if len(parts) > 1:
330
+ second_part = parts[1].strip()
331
+ # Check if it's a choice field: [option1|option2|option3]
332
+ if second_part.startswith("[") and second_part.endswith("]"):
333
+ choices_str = second_part[1:-1]
334
+ choices = [c.strip() for c in choices_str.split("|") if c.strip()]
335
+ if len(parts) > 2:
336
+ third_part = parts[2].strip()
337
+ if third_part in ["str", "list"]:
338
+ dtype = third_part
339
+ else:
340
+ description = third_part
341
+ if len(parts) > 3:
342
+ description = parts[3].strip()
343
+ elif second_part in ["str", "list"]:
344
+ dtype = second_part
345
+ if len(parts) > 2:
346
+ description = parts[2].strip()
347
+ else:
348
+ description = second_part
349
+
350
+ # Add field to structure
351
+ if choices:
352
+ struct_schema = struct_schema.field(
353
+ field_name,
354
+ dtype=dtype,
355
+ choices=choices,
356
+ description=description if description else None
357
+ )
358
+ elif description:
359
+ struct_schema = struct_schema.field(
360
+ field_name,
361
+ dtype=dtype,
362
+ description=description
363
+ )
364
+ else:
365
+ struct_schema = struct_schema.field(field_name, dtype=dtype)
366
+
367
+ schema = struct_schema
368
+
369
+ # Extract with combined schema
370
+ results = EXTRACTOR.extract(text, schema, threshold=threshold)
371
+
372
+ # JSON output
373
+ return json.dumps(results, indent=2)
374
+
375
+ except Exception as e:
376
+ return json.dumps({"error": str(e)}, indent=2)
377
+
378
+
379
+ # ============================================================================
380
+ # Example Data
381
+ # ============================================================================
382
+
383
+ EXAMPLES = {
384
+ "entities": [
385
+ [
386
+ "Apple Inc. CEO Tim Cook announced the new iPhone 15 in Cupertino, California on September 12, 2023.",
387
+ "company, person, product, location, date",
388
+ 0.5
389
+ ],
390
+ [
391
+ "Dr. Sarah Johnson from MIT published groundbreaking research on quantum computing.",
392
+ "person, organization, research_topic",
393
+ 0.4
394
+ ],
395
+ [
396
+ "Tesla Model 3 starts at $40,000 and features autopilot, 358-mile range, and 5-star safety rating.",
397
+ "product, company, price, feature, metric",
398
+ 0.4
399
+ ],
400
+ [
401
+ "The Eiffel Tower in Paris, France attracts millions of tourists annually. Built in 1889, it stands 330 meters tall.",
402
+ "landmark, location, country, date, measurement",
403
+ 0.5
404
+ ],
405
+ [
406
+ "Amazon acquired Whole Foods for $13.7 billion in 2017, marking their entry into grocery retail.",
407
+ "company, amount, date, industry",
408
+ 0.5
409
+ ],
410
+ [
411
+ "NASA's James Webb Space Telescope discovered exoplanet TRAPPIST-1e orbiting a red dwarf star 40 light-years away.",
412
+ "organization, technology, celestial_body, distance",
413
+ 0.4
414
+ ],
415
  ],
416
+ "classification": [
417
+ [
418
+ "This product exceeded my expectations! The quality is outstanding and delivery was super fast.",
419
+ "sentiment: positive, negative, neutral",
420
+ 0.5
421
+ ],
422
+ [
423
+ "Breaking: Major tech company announces layoffs affecting thousands of employees.",
424
+ "sentiment: positive, negative, neutral\nurgency: high, medium, low\ntopic (multi): technology, business, politics, sports, health",
425
+ 0.3
426
+ ],
427
+ [
428
+ "Your order #12345 has been shipped and will arrive by Friday. Track your package using the link below.",
429
+ "message_type: notification, marketing, support, alert\nsentiment: positive, negative, neutral",
430
+ 0.5
431
+ ],
432
+ [
433
+ "URGENT: Your account shows suspicious activity. Click here immediately to verify your identity.",
434
+ "intent: spam, phishing, legitimate, promotional\nurgency: critical, high, normal, low\nsafety (multi): safe, suspicious, malicious",
435
+ 0.4
436
+ ],
437
+ [
438
+ "Learn Python programming in just 30 days! Limited time offer: 50% off all courses. Don't miss out!",
439
+ "category: education, marketing, news, entertainment\ntone: professional, casual, urgent, friendly\naction_required: yes, no",
440
+ 0.5
441
+ ],
442
+ [
443
+ "The new climate report shows alarming trends in global temperatures. Scientists urge immediate action to reduce emissions.",
444
+ "topic (multi): climate, science, politics, environment\nemotion (multi): concern, urgency, hope, fear\ncredibility: high, medium, low",
445
+ 0.4
446
+ ],
447
+ [
448
+ "Subject: Re: Q4 Budget Proposal - Urgent Review Needed. Hi team, I've reviewed the budget proposal and have some concerns about the marketing allocation. We need to discuss this before Friday's board meeting. Please confirm your availability for a call tomorrow at 2 PM. Thanks, Sarah",
449
+ "email_type: internal, external, automated, newsletter\nsentiment: positive, negative, neutral\npriority: critical, high, medium, low\nintent: request, inform, complaint, inquiry, follow_up\ntone: professional, casual, urgent, friendly, formal\naction_required: yes, no\ndepartment (multi): finance, marketing, hr, engineering, sales\nurgency: immediate, soon, flexible\nresponse_expected: yes, no",
450
+ 0.4
451
+ ],
452
  ],
453
+ "json": [
454
+ [
455
+ "Contact John Smith at john.smith@email.com or call (555) 123-4567.",
456
+ "[contact]\nname::str\nemail::str\nphone::str",
457
+ 0.4
458
+ ],
459
+ [
460
+ "Patient: Sarah Johnson, 34, presented with chest pain. Prescribed: Lisinopril 10mg daily, Metoprolol 25mg twice daily.",
461
+ "[patient]\nname::str\nage::str\nsymptoms::list\n\n[prescription]\nmedication::str\ndosage::str\nfrequency::str",
462
+ 0.4
463
+ ],
464
+ [
465
+ "Order #ORD-2024-001: MacBook Pro 16 inch (Qty: 1, $2499), Magic Mouse (Qty: 2, $79). Subtotal: $2657, Tax: $212, Total: $2869",
466
+ "[order]\norder_id::str\nitems::list\nquantities::list\nunit_prices::list\nsubtotal::str\ntax::str\ntotal::str",
467
+ 0.4
468
+ ],
469
+ [
470
+ "Flight UA123 departing San Francisco (SFO) at 8:30 AM, arriving New York (JFK) at 5:15 PM. Gate B12, Seat 14A. Economy class.",
471
+ "[flight]\nflight_number::str\ndeparture_city::str\ndeparture_code::str\ndeparture_time::str\narrival_city::str\narrival_code::str\narrival_time::str\ngate::str\nseat::str\nclass::[economy|business|first]::str",
472
+ 0.4
473
+ ],
474
+ [
475
+ "Meeting scheduled for March 15, 2024 at 2:30 PM PST. Attendees: John Doe, Jane Smith, Bob Wilson. Topic: Q1 Budget Review. Location: Conference Room A (or Zoom link: zoom.us/j/123456).",
476
+ "[meeting]\ndate::str\ntime::str\ntimezone::str\nattendees::list\ntopic::str\nlocation::str\nvirtual_link::str",
477
+ 0.4
478
+ ],
479
+ [
480
+ "Job posting: Senior Software Engineer at Google, Mountain View CA. Salary: $150k-$200k. Requirements: 5+ years Python, React, AWS. Benefits include health insurance, 401k matching, unlimited PTO.",
481
+ "[job]\ntitle::str\ncompany::str\nlocation::str\nsalary_range::str\nrequired_skills::list\nyears_experience::str\nbenefits::list",
482
+ 0.4
483
+ ],
484
+ [
485
+ "Expense Report: Paid $85.50 at Whole Foods for groceries, $45 for Uber rides to office, $120 at Target for office supplies, and $156.80 for electricity bill.",
486
+ "[expense]\nvendor::str\namount::str\ncategory::[food|transport|shopping|utilities]::str\ndescription::str",
487
+ 0.4
488
+ ],
489
+ [
490
+ "Business expense: $67.25 at Starbucks for client meeting refreshments on March 15, 2024. Category: Food & Beverage. Payment method: Corporate card.",
491
+ "[expense]\ndate::str\nvendor::str\namount::str\ncategory::[food|transport|shopping|utilities]::str\npurpose::str\npayment_method::str",
492
+ 0.4
493
+ ],
494
  ],
495
+ "combined": [
496
+ [
497
+ "Apple CEO Tim Cook announced the new iPhone 15 in Cupertino for $999. This is exciting news!",
498
+ "<entities>\ncompany, person, product, location\n\n<classification>\nsentiment: positive, negative, neutral",
499
+ 0.5
500
+ ],
501
+ [
502
+ "Breaking: Tech startup raises $50M Series B. CEO Sarah Chen says 'We're hiring 100 engineers.' Contact: press@startup.com",
503
+ "<entities>\ncompany, person, amount\n\n<classification>\nsentiment: positive, negative, neutral\nurgency: high, medium, low\ntopic (multi): technology, business, finance\n\n<structures>\n[contact]\nemail::str\nrole::str",
504
+ 0.4
505
+ ],
506
+ [
507
+ "Dr. Emily Watson from Stanford University published research on AI safety. The paper discusses risks and proposes new frameworks. Contact: e.watson@stanford.edu for collaboration.",
508
+ "<entities>\nperson, organization, research_topic\n\n<classification>\ncategory: research, news, opinion\ncredibility: high, medium, low\n\n<structures>\n[researcher]\nname::str\nemail::str\naffiliation::str\nresearch_area::str",
509
+ 0.4
510
+ ],
511
+ [
512
+ "URGENT: Security breach at MegaCorp Inc. exposed 2 million user records including names, emails, and passwords. CEO John Davis apologized. Support: help@megacorp.com",
513
+ "<entities>\ncompany, person, data_type, amount\n\n<classification>\nurgency: critical, high, medium, low\nsentiment: positive, negative, neutral\ntopic (multi): security, technology, business, legal\n\n<structures>\n[incident]\ncompany::str\naffected_records::str\ndata_types::list\ncontact_email::str",
514
+ 0.3
515
+ ],
516
+ [
517
+ "New restaurant 'Le Bernardin' opens in NYC. Chef Eric Ripert serves French cuisine. Reservations: 555-1234 or reservations@bernardin.com. Price range: $$$. Menu includes Dover Sole, Wagyu Beef, and Chocolate Soufflรฉ.",
518
+ "<entities>\nrestaurant, location, person, cuisine, dish\n\n<classification>\nprice_range: budget, moderate, expensive, luxury\ncuisine_type: french, italian, american, asian, fusion\n\n<structures>\n[restaurant]\nname::str\nchef::str\nphone::str\nemail::str\nmenu_items::list\nlocation::str",
519
+ 0.4
520
+ ],
521
+ [
522
+ "Expense: John Smith spent $125.40 at Whole Foods Market in Seattle for groceries. Payment approved. High priority for reimbursement.",
523
+ "<entities>\nperson, merchant, location, amount\n\n<classification>\npriority: high, medium, low\napproval_status: approved, pending, rejected\n\n<structures>\n[expense]\nemployee::str\nvendor::str\namount::str\ncategory::[food|transport|shopping|utilities]::str\nlocation::str",
524
+ 0.4
525
+ ],
526
+ [
527
+ "Monthly expenses report: Sarah paid $78 at Shell Gas Station, $234.50 for internet/phone bill from AT&T, $89.99 at Amazon for office supplies, and $145 at Chipotle for team lunch. All expenses are pending approval with medium priority.",
528
+ "<entities>\nperson, merchant, amount\n\n<classification>\napproval_status (multi): approved, pending, rejected\npriority: high, medium, low\nexpense_type (multi): business, personal, travel\n\n<structures>\n[expense]\nemployee::str\nvendor::str\namount::str\ncategory::[food|transport|shopping|utilities]::str",
529
+ 0.4
530
+ ],
531
  ]
532
+ }
533
 
534
+
535
+ # ============================================================================
536
+ # UI Creation
537
+ # ============================================================================
538
+
539
+ def create_demo():
540
+ """Create the Gradio demo interface."""
541
+
542
+ with gr.Blocks(
543
+ title="GLiNER2 by Fastino",
544
+ theme=gr.themes.Soft(
545
+ primary_hue="slate",
546
+ secondary_hue="zinc",
547
+ ),
548
+ css="""
549
+ .gradio-container {
550
+ max-width: 1200px !important;
551
+ }
552
+ .header {
553
+ text-align: center;
554
+ padding: 2rem;
555
+ background: linear-gradient(135deg, #334155 0%, #1e293b 100%);
556
+ color: white;
557
+ border-radius: 10px;
558
+ margin-bottom: 2rem;
559
+ }
560
+ .header h1 {
561
+ margin: 0;
562
+ font-size: 2.5rem;
563
+ font-weight: bold;
564
+ }
565
+ .header p {
566
+ margin: 0.5rem 0 0 0;
567
+ font-size: 1.1rem;
568
+ opacity: 0.9;
569
+ }
570
+ .header a {
571
+ color: white;
572
+ text-decoration: none;
573
+ border-bottom: 2px solid rgba(255, 255, 255, 0.5);
574
+ transition: border-color 0.3s;
575
+ }
576
+ .header a:hover {
577
+ border-bottom-color: white;
578
+ }
579
+ .fastino-badge {
580
+ display: inline-block;
581
+ padding: 0.5rem 1rem;
582
+ background: rgba(255, 255, 255, 0.2);
583
+ color: white;
584
+ border-radius: 20px;
585
+ font-weight: bold;
586
+ margin-top: 1rem;
587
+ backdrop-filter: blur(10px);
588
+ }
589
+ .powered-by {
590
+ text-align: center;
591
+ padding: 1rem;
592
+ color: #64748b;
593
+ font-size: 0.9rem;
594
+ margin-top: 2rem;
595
+ }
596
  """
597
+ ) as demo:
598
+ # Header
599
+ gr.HTML(f"""
600
+ <div class="header">
601
+ <h1>๐Ÿค– GLiNER2 by <a href="https://fastino.ai" target="_blank">Fastino</a></h1>
602
+ <p>Advanced Information Extraction with Schema-Based Modeling</p>
603
+ <div class="fastino-badge">Powered by Fastino AI</div>
604
+ </div>
605
+ """)
606
+
607
+ # Tabs for different functionalities
608
+ with gr.Tabs():
609
+ # ==================== Entity Extraction Tab ====================
610
+ with gr.Tab("๐ŸŽฏ Entity Extraction"):
611
+ gr.Markdown("""
612
+ Extract named entities like people, organizations, locations, products, and more.
613
+ """)
614
+
615
+ with gr.Row():
616
+ with gr.Column(scale=2):
617
+ ner_text = gr.Textbox(
618
+ label="Input Text",
619
+ placeholder="Enter text to extract entities from...",
620
+ lines=5
621
+ )
622
+ ner_entities = gr.Textbox(
623
+ label="Entity Types (comma-separated)",
624
+ placeholder="e.g., person, company, location, date",
625
+ value="person, company, location"
626
+ )
627
+ ner_threshold = gr.Slider(
628
+ minimum=0.0,
629
+ maximum=1.0,
630
+ value=0.5,
631
+ step=0.05,
632
+ label="Confidence Threshold"
633
+ )
634
+ ner_button = gr.Button("Extract Entities", variant="primary", size="lg")
635
+
636
+ with gr.Column(scale=2):
637
+ ner_json = gr.Code(label="Results (JSON)", language="json", lines=15)
638
+
639
+ gr.Examples(
640
+ examples=EXAMPLES["entities"],
641
+ inputs=[ner_text, ner_entities, ner_threshold],
642
+ label="๐Ÿ’ก Try These Examples"
643
+ )
644
+
645
+ ner_button.click(
646
+ fn=extract_entities_demo,
647
+ inputs=[ner_text, ner_entities, ner_threshold],
648
+ outputs=ner_json
649
+ )
650
+
651
+ # ==================== Classification Tab ====================
652
+ with gr.Tab("๐Ÿท๏ธ Text Classification"):
653
+ gr.Markdown("""
654
+ Classify text into predefined categories. Supports multiple classification tasks at once!
655
+
656
+ **Format:** `task_name: label1, label2, label3`
657
+ **Multi-label:** Add `(multi)` after task name: `task_name (multi): label1, label2`
658
+ """)
659
+
660
+ with gr.Row():
661
+ with gr.Column(scale=2):
662
+ cls_text = gr.Textbox(
663
+ label="Input Text",
664
+ placeholder="Enter text to classify...",
665
+ lines=5
666
+ )
667
+ cls_tasks = gr.Textbox(
668
+ label="Classification Tasks (one per line)",
669
+ placeholder="sentiment: positive, negative, neutral\ntopic (multi): technology, business, sports",
670
+ value="sentiment: positive, negative, neutral",
671
+ lines=6
672
+ )
673
+ cls_threshold = gr.Slider(
674
+ minimum=0.0,
675
+ maximum=1.0,
676
+ value=0.5,
677
+ step=0.05,
678
+ label="Confidence Threshold"
679
+ )
680
+ cls_button = gr.Button("Classify", variant="primary", size="lg")
681
+
682
+ with gr.Column(scale=2):
683
+ cls_json = gr.Code(label="Results (JSON)", language="json", lines=15)
684
+
685
+ gr.Examples(
686
+ examples=EXAMPLES["classification"],
687
+ inputs=[cls_text, cls_tasks, cls_threshold],
688
+ label="๐Ÿ’ก Try These Examples"
689
+ )
690
+
691
+ cls_button.click(
692
+ fn=classify_text_demo,
693
+ inputs=[cls_text, cls_tasks, cls_threshold],
694
+ outputs=cls_json
695
+ )
696
+
697
+ # ==================== JSON Extraction Tab ====================
698
+ with gr.Tab("๐Ÿ“‹ JSON Extraction"):
699
+ gr.Markdown("""
700
+ Extract structured data from unstructured text. Supports multiple structures at once!
701
+
702
+ **Format:** Use `[structure_name]` headers followed by field specifications
703
+ **Fields:** `field_name::type::description` (type: str or list)
704
+ """)
705
+
706
+ with gr.Row():
707
+ with gr.Column(scale=2):
708
+ json_text = gr.Textbox(
709
+ label="Input Text",
710
+ placeholder="Enter text with structured information...",
711
+ lines=5
712
+ )
713
+ json_structures = gr.Textbox(
714
+ label="Structure Definitions (use [structure_name] headers)",
715
+ placeholder="[contact]\nname::str\nemail::str\nphone::str\n\n[product]\nname::str\nprice::str",
716
+ value="[contact]\nname::str\nemail::str\nphone::str",
717
+ lines=10
718
+ )
719
+ json_threshold = gr.Slider(
720
+ minimum=0.0,
721
+ maximum=1.0,
722
+ value=0.4,
723
+ step=0.05,
724
+ label="Threshold"
725
+ )
726
+ json_button = gr.Button("Extract Data", variant="primary", size="lg")
727
+
728
+ with gr.Column(scale=2):
729
+ json_json = gr.Code(label="Results (JSON)", language="json", lines=20)
730
+
731
+ gr.Examples(
732
+ examples=EXAMPLES["json"],
733
+ inputs=[json_text, json_structures, json_threshold],
734
+ label="๐Ÿ’ก Try These Examples"
735
+ )
736
+
737
+ json_button.click(
738
+ fn=extract_json_demo,
739
+ inputs=[json_text, json_structures, json_threshold],
740
+ outputs=json_json
741
+ )
742
+
743
+ # ==================== Combined Tasks Tab ====================
744
+ with gr.Tab("๐Ÿ”ฎ Combined Tasks"):
745
+ gr.Markdown("""
746
+ **Combine multiple extraction types in a single call!**
747
+
748
+ Use section headers to define any combination of tasks:
749
+ - `<entities>` - Named entity extraction (comma-separated)
750
+ - `<classification>` - Text classification tasks (one per line)
751
+ - `<structures>` - JSON structure extraction (use [name] headers)
752
+
753
+ **All sections are optional** - include only what you need!
754
+ """)
755
+
756
+ with gr.Row():
757
+ with gr.Column(scale=2):
758
+ combined_text = gr.Textbox(
759
+ label="Input Text",
760
+ placeholder="Enter text to analyze...",
761
+ lines=5
762
+ )
763
+ combined_schema = gr.Textbox(
764
+ label="Combined Schema Definition",
765
+ placeholder="<entities>\ncompany, person, location\n\n<classification>\nsentiment: positive, negative, neutral\n\n<structures>\n[contact]\nemail::str",
766
+ value="<entities>\ncompany, person, location\n\n<classification>\nsentiment: positive, negative, neutral",
767
+ lines=15
768
+ )
769
+ combined_threshold = gr.Slider(
770
+ minimum=0.0,
771
+ maximum=1.0,
772
+ value=0.5,
773
+ step=0.05,
774
+ label="Threshold"
775
+ )
776
+ combined_button = gr.Button("Extract All", variant="primary", size="lg")
777
+
778
+ with gr.Column(scale=2):
779
+ combined_json = gr.Code(label="Results (JSON)", language="json", lines=25)
780
+
781
+ gr.Examples(
782
+ examples=EXAMPLES["combined"],
783
+ inputs=[combined_text, combined_schema, combined_threshold],
784
+ label="๐Ÿ’ก Try These Examples"
785
+ )
786
+
787
+ combined_button.click(
788
+ fn=combined_demo,
789
+ inputs=[combined_text, combined_schema, combined_threshold],
790
+ outputs=combined_json
791
+ )
792
+
793
+ # Footer
794
+ gr.Markdown("""
795
+ ---
796
+ ### ๐Ÿ“š About GLiNER2
797
+
798
+ GLiNER2 is an advanced information extraction framework featuring:
799
+ - **Zero-shot entity recognition** with custom entity types
800
+ - **Flexible text classification** (single/multi-label)
801
+ - **Structured data extraction** from unstructured text
802
+ - **High performance** with state-of-the-art accuracy
803
+
804
+ **Model:** `fastino/gliner2-large-2907` | Built with โค๏ธ by [Fastino AI](https://fastino.ai)
805
+ """)
806
+
807
+ gr.HTML("""
808
+ <div class="powered-by">
809
+ <strong>Powered by Fastino AI</strong> โ€” Task-specific Language Models (TLMs) for production workloads
810
+ </div>
811
+ """)
812
+
813
+ return demo
814
+
815
+
816
+ # ============================================================================
817
+ # Main
818
+ # ============================================================================
819
+
820
+ if __name__ == "__main__":
821
+ demo = create_demo()
822
+ demo.launch()