Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import dateutil.parser | |
| import re | |
| import os | |
| import sys | |
| from datetime import datetime, timedelta | |
| from transformers import pipeline | |
| # load Transformer model | |
| ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") | |
| # Helper function: Normalize date and time expressions | |
| def normalize_date_time(text): | |
| try: | |
| parsed_date = dateutil.parser.parse(text, fuzzy=True) | |
| return parsed_date | |
| except (ValueError, TypeError): | |
| return None | |
| # Calculate duration between two times | |
| def calculate_duration(start_time, end_time): | |
| if isinstance(start_time, datetime) and isinstance(end_time, datetime): | |
| delta = end_time - start_time | |
| total_minutes = int(delta.total_seconds() / 60) | |
| hours = total_minutes // 60 | |
| minutes = total_minutes % 60 | |
| return f"{hours}h {minutes}m" | |
| return "Unknown" | |
| # Extract entities using Transformer NER model | |
| def extract_entities(text): | |
| ner_results = ner_pipeline(text) | |
| entities = { | |
| "PERSON": [], | |
| "DATE": [], | |
| "TIME": [], | |
| "GPE": [], | |
| "EVENT": [] | |
| } | |
| for entity in ner_results: | |
| label = entity["entity"] | |
| word = entity["word"] | |
| # Convert Hugging Face labels to meaningful categories | |
| if "PER" in label: | |
| entities["PERSON"].append(word) | |
| elif "ORG" in label or "LOC" in label or "GPE" in label: | |
| entities["GPE"].append(word) | |
| elif "DATE" in label: | |
| normalized_date = normalize_date_time(word) | |
| if normalized_date: | |
| entities["DATE"].append(normalized_date.strftime("%Y-%m-%d")) | |
| else: | |
| entities["DATE"].append(word) | |
| elif "TIME" in label: | |
| entities["TIME"].append(word) | |
| elif "MISC" in label or "EVENT" in label: | |
| entities["EVENT"].append(word) | |
| return entities | |
| # Main function | |
| def process_text(text): | |
| entities = extract_entities(text) | |
| # Extract start time, end time, and calculate duration | |
| start_time = None | |
| end_time = None | |
| duration = "Unknown" | |
| # Look for time range patterns (e.g., "3PM to 4PM") | |
| time_pattern = re.compile(r"(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)\s*(?:to|until|-)\s*(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)", re.IGNORECASE) | |
| match = time_pattern.search(text) | |
| if match: | |
| start_str, end_str = match.groups() | |
| start_time = normalize_date_time(start_str) | |
| end_time = normalize_date_time(end_str) | |
| else: | |
| # If no range, use the first TIME entity as start time | |
| if entities["TIME"]: | |
| start_time = normalize_date_time(entities["TIME"][0]) | |
| # Calculate duration if both start and end times are available | |
| if start_time and end_time: | |
| duration = calculate_duration(start_time, end_time) | |
| start_time_str = start_time.strftime("%H:%M") | |
| end_time_str = end_time.strftime("%H:%M") | |
| else: | |
| start_time_str = start_time.strftime("%H:%M") if start_time else "None" | |
| end_time_str = "None" | |
| # Construct structured output | |
| result = { | |
| "Event": entities["EVENT"][0] if entities["EVENT"] else "Unknown", | |
| "People": ", ".join(entities["PERSON"]) if entities["PERSON"] else "None", | |
| "Date": ", ".join(entities["DATE"]) if entities["DATE"] else "None", | |
| "Start Time": start_time_str, | |
| "End Time": end_time_str, | |
| "Duration": duration, | |
| "Location": ", ".join(entities["GPE"]) if entities["GPE"] else "None" | |
| } | |
| # Format the output | |
| output = ( | |
| f"Event: {result['Event']}\n" | |
| f"People: {result['People']}\n" | |
| f"Date: {result['Date']}\n" | |
| f"Start Time: {result['Start Time']}\n" | |
| f"End Time: {result['End Time']}\n" | |
| f"Duration: {result['Duration']}\n" | |
| f"Location: {result['Location']}\n" | |
| ) | |
| return output | |
| # Gradio interface | |
| demo = gr.Interface( | |
| fn=process_text, | |
| inputs="text", | |
| outputs="text", | |
| title="Transformer-based Text Normalization", | |
| description="Extract events, dates, times, and locations from input text using a high-accuracy Transformer NER model." | |
| ) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| print("Launching Gradio application...") | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |