Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| import gradio as gr | |
| import json | |
| from gliner2 import GLiNER2 | |
| from huggingface_hub import login | |
| import os | |
| from typing import Dict, Any, List | |
| import torch | |
| # Authenticate with Hugging Face | |
| hf_token = os.getenv("HF_TOKEN") | |
| login(hf_token) | |
| # ============================================================================ | |
| # Pre-load Model | |
| # ============================================================================ | |
| print("๐ Loading GLiNER2 model...") | |
| print("This may take a minute on first run (downloading model)...") | |
| DEFAULT_MODEL = "fastino/gliner2-large-2907" | |
| EXTRACTOR = None | |
| if MODEL_AVAILABLE: | |
| try: | |
| EXTRACTOR = GLiNER2.from_pretrained(DEFAULT_MODEL) | |
| print(f"โ Model loaded successfully: {DEFAULT_MODEL}") | |
| except Exception as e: | |
| print(f"โ Failed to load model: {e}") | |
| print("Demo will run in UI-only mode.") | |
| else: | |
| print("โ ๏ธ GLiNER2 not available. Demo will run in UI-only mode.") | |
| # ============================================================================ | |
| # Helper Functions | |
| # ============================================================================ | |
| def parse_classification_tasks(tasks_text: str, threshold: float): | |
| """Parse multi-line classification task definitions. | |
| Format: | |
| task_name: label1, label2, label3 | |
| another_task (multi): label1, label2 | |
| """ | |
| tasks = {} | |
| for line in tasks_text.strip().split("\n"): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| # Check for multi-label indicator | |
| multi_label = False | |
| if "(multi)" in line or "(multi-label)" in line: | |
| multi_label = True | |
| line = line.replace("(multi)", "").replace("(multi-label)", "") | |
| # Parse task_name: label1, label2, label3 | |
| if ":" not in line: | |
| continue | |
| parts = line.split(":", 1) | |
| task_name = parts[0].strip() | |
| labels_str = parts[1].strip() | |
| if not task_name or not labels_str: | |
| continue | |
| # Parse labels | |
| labels = [l.strip() for l in labels_str.split(",") if l.strip()] | |
| # Build task config | |
| if labels: | |
| tasks[task_name] = { | |
| "labels": labels, | |
| "multi_label": multi_label, | |
| "cls_threshold": threshold | |
| } | |
| return tasks | |
| def parse_json_structures(structures_text: str): | |
| """Parse multi-structure JSON definitions. | |
| Format: | |
| [structure_name] | |
| field1::str::description | |
| field2::list | |
| [another_structure] | |
| field3::str | |
| """ | |
| structures = {} | |
| current_structure = None | |
| current_fields = [] | |
| for line in structures_text.strip().split("\n"): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| # Check for structure header: [structure_name] | |
| if line.startswith("[") and line.endswith("]"): | |
| # Save previous structure | |
| if current_structure and current_fields: | |
| structures[current_structure] = current_fields | |
| # Start new structure | |
| current_structure = line[1:-1].strip() | |
| current_fields = [] | |
| else: | |
| # Add field to current structure | |
| if current_structure: | |
| current_fields.append(line) | |
| # Save last structure | |
| if current_structure and current_fields: | |
| structures[current_structure] = current_fields | |
| return structures | |
| def parse_combined_schema(schema_text: str, threshold: float): | |
| """Parse combined schema with multiple task types. | |
| Format: | |
| <entities> | |
| person, company, location | |
| <classification> | |
| sentiment: positive, negative, neutral | |
| <structures> | |
| [contact] | |
| name::str | |
| email::str | |
| """ | |
| result = { | |
| "entities": None, | |
| "classification": None, | |
| "structures": None | |
| } | |
| current_section = None | |
| section_content = [] | |
| for line in schema_text.strip().split("\n"): | |
| stripped = line.strip() | |
| # Check for section headers | |
| if stripped in ["<entities>", "<classification>", "<structures>"]: | |
| # Save previous section | |
| if current_section and section_content: | |
| content = "\n".join(section_content) | |
| if current_section == "entities": | |
| # Parse comma-separated entities | |
| result["entities"] = [e.strip() for e in content.split(",") if e.strip()] | |
| elif current_section == "classification": | |
| result["classification"] = parse_classification_tasks(content, threshold) | |
| elif current_section == "structures": | |
| result["structures"] = parse_json_structures(content) | |
| # Start new section | |
| current_section = stripped[1:-1] # Remove < > | |
| section_content = [] | |
| else: | |
| # Add line to current section | |
| if current_section and stripped: | |
| section_content.append(line) | |
| # Save last section | |
| if current_section and section_content: | |
| content = "\n".join(section_content) | |
| if current_section == "entities": | |
| result["entities"] = [e.strip() for e in content.split(",") if e.strip()] | |
| elif current_section == "classification": | |
| result["classification"] = parse_classification_tasks(content, threshold) | |
| elif current_section == "structures": | |
| result["structures"] = parse_json_structures(content) | |
| return result | |
| # ============================================================================ | |
| # Demo Functions | |
| # ============================================================================ | |
| def extract_entities_demo(text: str, entity_types: str, threshold: float): | |
| """Demo for entity extraction.""" | |
| if EXTRACTOR is None: | |
| return json.dumps({"error": "Model not loaded. Please check the console for errors."}, indent=2) | |
| if not text.strip(): | |
| return json.dumps({"error": "Please enter some text to analyze."}, indent=2) | |
| if not entity_types.strip(): | |
| return json.dumps({"error": "Please specify entity types (comma-separated)."}, indent=2) | |
| try: | |
| # Parse entity types | |
| entities = [e.strip() for e in entity_types.split(",") if e.strip()] | |
| # Extract | |
| results = EXTRACTOR.extract_entities( | |
| text, | |
| entities, | |
| threshold=threshold | |
| ) | |
| # JSON output | |
| return json.dumps(results, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| def classify_text_demo(text: str, tasks_text: str, threshold: float): | |
| """Demo for text classification with support for multiple tasks.""" | |
| if EXTRACTOR is None: | |
| return json.dumps({"error": "Model not loaded. Please check the console for errors."}, indent=2) | |
| if not text.strip(): | |
| return json.dumps({"error": "Please enter some text to classify."}, indent=2) | |
| if not tasks_text.strip(): | |
| return json.dumps({"error": "Please specify classification tasks (one per line)."}, indent=2) | |
| try: | |
| # Parse tasks | |
| tasks = parse_classification_tasks(tasks_text, threshold) | |
| if not tasks: | |
| return json.dumps({"error": "No valid tasks found. Use format: task_name: label1, label2, label3"}, | |
| indent=2) | |
| # Classify | |
| results = EXTRACTOR.classify_text(text, tasks) | |
| # JSON output | |
| return json.dumps(results, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| def extract_json_demo(text: str, structures_text: str, threshold: float): | |
| """Demo for structured JSON extraction with support for multiple structures.""" | |
| if EXTRACTOR is None: | |
| return json.dumps({"error": "Model not loaded. Please check the console for errors."}, indent=2) | |
| if not text.strip(): | |
| return json.dumps({"error": "Please enter some text to analyze."}, indent=2) | |
| if not structures_text.strip(): | |
| return json.dumps({"error": "Please specify structure definitions."}, indent=2) | |
| try: | |
| # Parse structures | |
| structures = parse_json_structures(structures_text) | |
| if not structures: | |
| return json.dumps({"error": "No valid structures found. Use format: [structure_name] followed by fields."}, | |
| indent=2) | |
| # Extract | |
| results = EXTRACTOR.extract_json(text, structures, threshold=threshold) | |
| # JSON output | |
| return json.dumps(results, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| def combined_demo(text: str, schema_text: str, threshold: float): | |
| """Combined extraction with entities, classification, and structures.""" | |
| if EXTRACTOR is None: | |
| return json.dumps({"error": "Model not loaded. Please check the console for errors."}, indent=2) | |
| if not text.strip(): | |
| return json.dumps({"error": "Please enter some text to analyze."}, indent=2) | |
| if not schema_text.strip(): | |
| return json.dumps({"error": "Please define at least one task section."}, indent=2) | |
| try: | |
| # Parse schema | |
| parsed = parse_combined_schema(schema_text, threshold) | |
| # Check if at least one section is defined | |
| if not any([parsed["entities"], parsed["classification"], parsed["structures"]]): | |
| return json.dumps( | |
| {"error": "No valid tasks found. Use <entities>, <classification>, or <structures> sections."}, | |
| indent=2) | |
| # Build schema using GLiNER2's create_schema API | |
| schema = EXTRACTOR.create_schema() | |
| # Add entities if defined | |
| if parsed["entities"]: | |
| schema = schema.entities(parsed["entities"]) | |
| # Add classifications if defined | |
| if parsed["classification"]: | |
| for task_name, task_config in parsed["classification"].items(): | |
| schema = schema.classification( | |
| task_name, | |
| task_config["labels"], | |
| multi_label=task_config["multi_label"], | |
| cls_threshold=task_config["cls_threshold"] | |
| ) | |
| # Add structures if defined | |
| if parsed["structures"]: | |
| for struct_name, fields in parsed["structures"].items(): | |
| struct_schema = schema.structure(struct_name) | |
| for field_spec in fields: | |
| # Parse field specification: field_name::type::description | |
| parts = field_spec.split("::") | |
| field_name = parts[0].strip() | |
| # Default values | |
| dtype = "list" | |
| description = None | |
| choices = None | |
| # Parse type and description if provided | |
| if len(parts) > 1: | |
| second_part = parts[1].strip() | |
| # Check if it's a choice field: [option1|option2|option3] | |
| if second_part.startswith("[") and second_part.endswith("]"): | |
| choices_str = second_part[1:-1] | |
| choices = [c.strip() for c in choices_str.split("|") if c.strip()] | |
| if len(parts) > 2: | |
| third_part = parts[2].strip() | |
| if third_part in ["str", "list"]: | |
| dtype = third_part | |
| else: | |
| description = third_part | |
| if len(parts) > 3: | |
| description = parts[3].strip() | |
| elif second_part in ["str", "list"]: | |
| dtype = second_part | |
| if len(parts) > 2: | |
| description = parts[2].strip() | |
| else: | |
| description = second_part | |
| # Add field to structure | |
| if choices: | |
| struct_schema = struct_schema.field( | |
| field_name, | |
| dtype=dtype, | |
| choices=choices, | |
| description=description if description else None | |
| ) | |
| elif description: | |
| struct_schema = struct_schema.field( | |
| field_name, | |
| dtype=dtype, | |
| description=description | |
| ) | |
| else: | |
| struct_schema = struct_schema.field(field_name, dtype=dtype) | |
| schema = struct_schema | |
| # Extract with combined schema | |
| results = EXTRACTOR.extract(text, schema, threshold=threshold) | |
| # JSON output | |
| return json.dumps(results, indent=2) | |
| except Exception as e: | |
| return json.dumps({"error": str(e)}, indent=2) | |
| # ============================================================================ | |
| # Example Data | |
| # ============================================================================ | |
| EXAMPLES = { | |
| "entities": [ | |
| [ | |
| "Apple Inc. CEO Tim Cook announced the new iPhone 15 in Cupertino, California on September 12, 2023.", | |
| "company, person, product, location, date", | |
| 0.5 | |
| ], | |
| [ | |
| "Dr. Sarah Johnson from MIT published groundbreaking research on quantum computing.", | |
| "person, organization, research_topic", | |
| 0.4 | |
| ], | |
| [ | |
| "Tesla Model 3 starts at $40,000 and features autopilot, 358-mile range, and 5-star safety rating.", | |
| "product, company, price, feature, metric", | |
| 0.4 | |
| ], | |
| [ | |
| "The Eiffel Tower in Paris, France attracts millions of tourists annually. Built in 1889, it stands 330 meters tall.", | |
| "landmark, location, country, date, measurement", | |
| 0.5 | |
| ], | |
| [ | |
| "Amazon acquired Whole Foods for $13.7 billion in 2017, marking their entry into grocery retail.", | |
| "company, amount, date, industry", | |
| 0.5 | |
| ], | |
| [ | |
| "NASA's James Webb Space Telescope discovered exoplanet TRAPPIST-1e orbiting a red dwarf star 40 light-years away.", | |
| "organization, technology, celestial_body, distance", | |
| 0.4 | |
| ], | |
| ], | |
| "classification": [ | |
| [ | |
| "This product exceeded my expectations! The quality is outstanding and delivery was super fast.", | |
| "sentiment: positive, negative, neutral", | |
| 0.5 | |
| ], | |
| [ | |
| "Breaking: Major tech company announces layoffs affecting thousands of employees.", | |
| "sentiment: positive, negative, neutral\nurgency: high, medium, low\ntopic (multi): technology, business, politics, sports, health", | |
| 0.3 | |
| ], | |
| [ | |
| "Your order #12345 has been shipped and will arrive by Friday. Track your package using the link below.", | |
| "message_type: notification, marketing, support, alert\nsentiment: positive, negative, neutral", | |
| 0.5 | |
| ], | |
| [ | |
| "URGENT: Your account shows suspicious activity. Click here immediately to verify your identity.", | |
| "intent: spam, phishing, legitimate, promotional\nurgency: critical, high, normal, low\nsafety (multi): safe, suspicious, malicious", | |
| 0.4 | |
| ], | |
| [ | |
| "Learn Python programming in just 30 days! Limited time offer: 50% off all courses. Don't miss out!", | |
| "category: education, marketing, news, entertainment\ntone: professional, casual, urgent, friendly\naction_required: yes, no", | |
| 0.5 | |
| ], | |
| [ | |
| "The new climate report shows alarming trends in global temperatures. Scientists urge immediate action to reduce emissions.", | |
| "topic (multi): climate, science, politics, environment\nemotion (multi): concern, urgency, hope, fear\ncredibility: high, medium, low", | |
| 0.4 | |
| ], | |
| [ | |
| "Subject: Re: Q4 Budget Proposal - Urgent Review Needed. Hi team, I've reviewed the budget proposal and have some concerns about the marketing allocation. We need to discuss this before Friday's board meeting. Please confirm your availability for a call tomorrow at 2 PM. Thanks, Sarah", | |
| "email_type: internal, external, automated, newsletter\nsentiment: positive, negative, neutral\npriority: critical, high, medium, low\nintent: request, inform, complaint, inquiry, follow_up\ntone: professional, casual, urgent, friendly, formal\naction_required: yes, no\ndepartment (multi): finance, marketing, hr, engineering, sales\nurgency: immediate, soon, flexible\nresponse_expected: yes, no", | |
| 0.4 | |
| ], | |
| ], | |
| "json": [ | |
| [ | |
| "Contact John Smith at john.smith@email.com or call (555) 123-4567.", | |
| "[contact]\nname::str\nemail::str\nphone::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Patient: Sarah Johnson, 34, presented with chest pain. Prescribed: Lisinopril 10mg daily, Metoprolol 25mg twice daily.", | |
| "[patient]\nname::str\nage::str\nsymptoms::list\n\n[prescription]\nmedication::str\ndosage::str\nfrequency::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Order #ORD-2024-001: MacBook Pro 16 inch (Qty: 1, $2499), Magic Mouse (Qty: 2, $79). Subtotal: $2657, Tax: $212, Total: $2869", | |
| "[order]\norder_id::str\nitems::list\nquantities::list\nunit_prices::list\nsubtotal::str\ntax::str\ntotal::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Flight UA123 departing San Francisco (SFO) at 8:30 AM, arriving New York (JFK) at 5:15 PM. Gate B12, Seat 14A. Economy class.", | |
| "[flight]\nflight_number::str\ndeparture_city::str\ndeparture_code::str\ndeparture_time::str\narrival_city::str\narrival_code::str\narrival_time::str\ngate::str\nseat::str\nclass::[economy|business|first]::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Meeting scheduled for March 15, 2024 at 2:30 PM PST. Attendees: John Doe, Jane Smith, Bob Wilson. Topic: Q1 Budget Review. Location: Conference Room A (or Zoom link: zoom.us/j/123456).", | |
| "[meeting]\ndate::str\ntime::str\ntimezone::str\nattendees::list\ntopic::str\nlocation::str\nvirtual_link::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Job posting: Senior Software Engineer at Google, Mountain View CA. Salary: $150k-$200k. Requirements: 5+ years Python, React, AWS. Benefits include health insurance, 401k matching, unlimited PTO.", | |
| "[job]\ntitle::str\ncompany::str\nlocation::str\nsalary_range::str\nrequired_skills::list\nyears_experience::str\nbenefits::list", | |
| 0.4 | |
| ], | |
| [ | |
| "Expense Report: Paid $85.50 at Whole Foods for groceries, $45 for Uber rides to office, $120 at Target for office supplies, and $156.80 for electricity bill.", | |
| "[expense]\nvendor::str\namount::str\ncategory::[food|transport|shopping|utilities]::str\ndescription::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Business expense: $67.25 at Starbucks for client meeting refreshments on March 15, 2024. Category: Food & Beverage. Payment method: Corporate card.", | |
| "[expense]\ndate::str\nvendor::str\namount::str\ncategory::[food|transport|shopping|utilities]::str\npurpose::str\npayment_method::str", | |
| 0.4 | |
| ], | |
| ], | |
| "combined": [ | |
| [ | |
| "Apple CEO Tim Cook announced the new iPhone 15 in Cupertino for $999. This is exciting news!", | |
| "<entities>\ncompany, person, product, location\n\n<classification>\nsentiment: positive, negative, neutral", | |
| 0.5 | |
| ], | |
| [ | |
| "Breaking: Tech startup raises $50M Series B. CEO Sarah Chen says 'We're hiring 100 engineers.' Contact: press@startup.com", | |
| "<entities>\ncompany, person, amount\n\n<classification>\nsentiment: positive, negative, neutral\nurgency: high, medium, low\ntopic (multi): technology, business, finance\n\n<structures>\n[contact]\nemail::str\nrole::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Dr. Emily Watson from Stanford University published research on AI safety. The paper discusses risks and proposes new frameworks. Contact: e.watson@stanford.edu for collaboration.", | |
| "<entities>\nperson, organization, research_topic\n\n<classification>\ncategory: research, news, opinion\ncredibility: high, medium, low\n\n<structures>\n[researcher]\nname::str\nemail::str\naffiliation::str\nresearch_area::str", | |
| 0.4 | |
| ], | |
| [ | |
| "URGENT: Security breach at MegaCorp Inc. exposed 2 million user records including names, emails, and passwords. CEO John Davis apologized. Support: help@megacorp.com", | |
| "<entities>\ncompany, person, data_type, amount\n\n<classification>\nurgency: critical, high, medium, low\nsentiment: positive, negative, neutral\ntopic (multi): security, technology, business, legal\n\n<structures>\n[incident]\ncompany::str\naffected_records::str\ndata_types::list\ncontact_email::str", | |
| 0.3 | |
| ], | |
| [ | |
| "New restaurant 'Le Bernardin' opens in NYC. Chef Eric Ripert serves French cuisine. Reservations: 555-1234 or reservations@bernardin.com. Price range: $$$. Menu includes Dover Sole, Wagyu Beef, and Chocolate Soufflรฉ.", | |
| "<entities>\nrestaurant, location, person, cuisine, dish\n\n<classification>\nprice_range: budget, moderate, expensive, luxury\ncuisine_type: french, italian, american, asian, fusion\n\n<structures>\n[restaurant]\nname::str\nchef::str\nphone::str\nemail::str\nmenu_items::list\nlocation::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Expense: John Smith spent $125.40 at Whole Foods Market in Seattle for groceries. Payment approved. High priority for reimbursement.", | |
| "<entities>\nperson, merchant, location, amount\n\n<classification>\npriority: high, medium, low\napproval_status: approved, pending, rejected\n\n<structures>\n[expense]\nemployee::str\nvendor::str\namount::str\ncategory::[food|transport|shopping|utilities]::str\nlocation::str", | |
| 0.4 | |
| ], | |
| [ | |
| "Monthly expenses report: Sarah paid $78 at Shell Gas Station, $234.50 for internet/phone bill from AT&T, $89.99 at Amazon for office supplies, and $145 at Chipotle for team lunch. All expenses are pending approval with medium priority.", | |
| "<entities>\nperson, merchant, amount\n\n<classification>\napproval_status (multi): approved, pending, rejected\npriority: high, medium, low\nexpense_type (multi): business, personal, travel\n\n<structures>\n[expense]\nemployee::str\nvendor::str\namount::str\ncategory::[food|transport|shopping|utilities]::str", | |
| 0.4 | |
| ], | |
| ] | |
| } | |
| # ============================================================================ | |
| # UI Creation | |
| # ============================================================================ | |
| def create_demo(): | |
| """Create the Gradio demo interface.""" | |
| with gr.Blocks( | |
| title="GLiNER2 by Fastino", | |
| theme=gr.themes.Soft( | |
| primary_hue="slate", | |
| secondary_hue="zinc", | |
| ), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .header { | |
| text-align: center; | |
| padding: 2rem; | |
| background: linear-gradient(135deg, #334155 0%, #1e293b 100%); | |
| color: white; | |
| border-radius: 10px; | |
| margin-bottom: 2rem; | |
| } | |
| .header h1 { | |
| margin: 0; | |
| font-size: 2.5rem; | |
| font-weight: bold; | |
| } | |
| .header p { | |
| margin: 0.5rem 0 0 0; | |
| font-size: 1.1rem; | |
| opacity: 0.9; | |
| } | |
| .header a { | |
| color: white; | |
| text-decoration: none; | |
| border-bottom: 2px solid rgba(255, 255, 255, 0.5); | |
| transition: border-color 0.3s; | |
| } | |
| .header a:hover { | |
| border-bottom-color: white; | |
| } | |
| .fastino-badge { | |
| display: inline-block; | |
| padding: 0.5rem 1rem; | |
| background: rgba(255, 255, 255, 0.2); | |
| color: white; | |
| border-radius: 20px; | |
| font-weight: bold; | |
| margin-top: 1rem; | |
| backdrop-filter: blur(10px); | |
| } | |
| .powered-by { | |
| text-align: center; | |
| padding: 1rem; | |
| color: #64748b; | |
| font-size: 0.9rem; | |
| margin-top: 2rem; | |
| } | |
| """ | |
| ) as demo: | |
| # Header | |
| gr.HTML(f""" | |
| <div class="header"> | |
| <h1>๐ค GLiNER2 by <a href="https://fastino.ai" target="_blank">Fastino</a></h1> | |
| <p>Advanced Information Extraction with Schema-Based Modeling</p> | |
| <div class="fastino-badge">Powered by Fastino AI</div> | |
| </div> | |
| """) | |
| # Tabs for different functionalities | |
| with gr.Tabs(): | |
| # ==================== Entity Extraction Tab ==================== | |
| with gr.Tab("๐ฏ Entity Extraction"): | |
| gr.Markdown(""" | |
| Extract named entities like people, organizations, locations, products, and more. | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| ner_text = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter text to extract entities from...", | |
| lines=5 | |
| ) | |
| ner_entities = gr.Textbox( | |
| label="Entity Types (comma-separated)", | |
| placeholder="e.g., person, company, location, date", | |
| value="person, company, location" | |
| ) | |
| ner_threshold = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.5, | |
| step=0.05, | |
| label="Confidence Threshold" | |
| ) | |
| ner_button = gr.Button("Extract Entities", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| ner_json = gr.Code(label="Results (JSON)", language="json", lines=15) | |
| gr.Examples( | |
| examples=EXAMPLES["entities"], | |
| inputs=[ner_text, ner_entities, ner_threshold], | |
| label="๐ก Try These Examples" | |
| ) | |
| ner_button.click( | |
| fn=extract_entities_demo, | |
| inputs=[ner_text, ner_entities, ner_threshold], | |
| outputs=ner_json | |
| ) | |
| # ==================== Classification Tab ==================== | |
| with gr.Tab("๐ท๏ธ Text Classification"): | |
| gr.Markdown(""" | |
| Classify text into predefined categories. Supports multiple classification tasks at once! | |
| **Format:** `task_name: label1, label2, label3` | |
| **Multi-label:** Add `(multi)` after task name: `task_name (multi): label1, label2` | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| cls_text = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter text to classify...", | |
| lines=5 | |
| ) | |
| cls_tasks = gr.Textbox( | |
| label="Classification Tasks (one per line)", | |
| placeholder="sentiment: positive, negative, neutral\ntopic (multi): technology, business, sports", | |
| value="sentiment: positive, negative, neutral", | |
| lines=6 | |
| ) | |
| cls_threshold = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.5, | |
| step=0.05, | |
| label="Confidence Threshold" | |
| ) | |
| cls_button = gr.Button("Classify", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| cls_json = gr.Code(label="Results (JSON)", language="json", lines=15) | |
| gr.Examples( | |
| examples=EXAMPLES["classification"], | |
| inputs=[cls_text, cls_tasks, cls_threshold], | |
| label="๐ก Try These Examples" | |
| ) | |
| cls_button.click( | |
| fn=classify_text_demo, | |
| inputs=[cls_text, cls_tasks, cls_threshold], | |
| outputs=cls_json | |
| ) | |
| # ==================== JSON Extraction Tab ==================== | |
| with gr.Tab("๐ JSON Extraction"): | |
| gr.Markdown(""" | |
| Extract structured data from unstructured text. Supports multiple structures at once! | |
| **Format:** Use `[structure_name]` headers followed by field specifications | |
| **Fields:** `field_name::type::description` (type: str or list) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| json_text = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter text with structured information...", | |
| lines=5 | |
| ) | |
| json_structures = gr.Textbox( | |
| label="Structure Definitions (use [structure_name] headers)", | |
| placeholder="[contact]\nname::str\nemail::str\nphone::str\n\n[product]\nname::str\nprice::str", | |
| value="[contact]\nname::str\nemail::str\nphone::str", | |
| lines=10 | |
| ) | |
| json_threshold = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.4, | |
| step=0.05, | |
| label="Threshold" | |
| ) | |
| json_button = gr.Button("Extract Data", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| json_json = gr.Code(label="Results (JSON)", language="json", lines=20) | |
| gr.Examples( | |
| examples=EXAMPLES["json"], | |
| inputs=[json_text, json_structures, json_threshold], | |
| label="๐ก Try These Examples" | |
| ) | |
| json_button.click( | |
| fn=extract_json_demo, | |
| inputs=[json_text, json_structures, json_threshold], | |
| outputs=json_json | |
| ) | |
| # ==================== Combined Tasks Tab ==================== | |
| with gr.Tab("๐ฎ Combined Tasks"): | |
| gr.Markdown(""" | |
| **Combine multiple extraction types in a single call!** | |
| Use section headers to define any combination of tasks: | |
| - `<entities>` - Named entity extraction (comma-separated) | |
| - `<classification>` - Text classification tasks (one per line) | |
| - `<structures>` - JSON structure extraction (use [name] headers) | |
| **All sections are optional** - include only what you need! | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| combined_text = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter text to analyze...", | |
| lines=5 | |
| ) | |
| combined_schema = gr.Textbox( | |
| label="Combined Schema Definition", | |
| placeholder="<entities>\ncompany, person, location\n\n<classification>\nsentiment: positive, negative, neutral\n\n<structures>\n[contact]\nemail::str", | |
| value="<entities>\ncompany, person, location\n\n<classification>\nsentiment: positive, negative, neutral", | |
| lines=15 | |
| ) | |
| combined_threshold = gr.Slider( | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=0.5, | |
| step=0.05, | |
| label="Threshold" | |
| ) | |
| combined_button = gr.Button("Extract All", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| combined_json = gr.Code(label="Results (JSON)", language="json", lines=25) | |
| gr.Examples( | |
| examples=EXAMPLES["combined"], | |
| inputs=[combined_text, combined_schema, combined_threshold], | |
| label="๐ก Try These Examples" | |
| ) | |
| combined_button.click( | |
| fn=combined_demo, | |
| inputs=[combined_text, combined_schema, combined_threshold], | |
| outputs=combined_json | |
| ) | |
| # Footer | |
| gr.Markdown(""" | |
| --- | |
| ### ๐ About GLiNER2 | |
| GLiNER2 is an advanced information extraction framework featuring: | |
| - **Zero-shot entity recognition** with custom entity types | |
| - **Flexible text classification** (single/multi-label) | |
| - **Structured data extraction** from unstructured text | |
| - **High performance** with state-of-the-art accuracy | |
| **Model:** `fastino/gliner2-large-2907` | Built with โค๏ธ by [Fastino AI](https://fastino.ai) | |
| """) | |
| gr.HTML(""" | |
| <div class="powered-by"> | |
| <strong>Powered by Fastino AI</strong> โ Task-specific Language Models (TLMs) for production workloads | |
| </div> | |
| """) | |
| return demo | |
| # ============================================================================ | |
| # Main | |
| # ============================================================================ | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| demo.launch() |