import gradio as gr import dateutil.parser import re import os import sys from datetime import datetime, timedelta from transformers import pipeline # load Transformer model ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english") # Helper function: Normalize date and time expressions def normalize_date_time(text): try: parsed_date = dateutil.parser.parse(text, fuzzy=True) return parsed_date except (ValueError, TypeError): return None # Calculate duration between two times def calculate_duration(start_time, end_time): if isinstance(start_time, datetime) and isinstance(end_time, datetime): delta = end_time - start_time total_minutes = int(delta.total_seconds() / 60) hours = total_minutes // 60 minutes = total_minutes % 60 return f"{hours}h {minutes}m" return "Unknown" # Extract entities using Transformer NER model def extract_entities(text): ner_results = ner_pipeline(text) entities = { "PERSON": [], "DATE": [], "TIME": [], "GPE": [], "EVENT": [] } for entity in ner_results: label = entity["entity"] word = entity["word"] # Convert Hugging Face labels to meaningful categories if "PER" in label: entities["PERSON"].append(word) elif "ORG" in label or "LOC" in label or "GPE" in label: entities["GPE"].append(word) elif "DATE" in label: normalized_date = normalize_date_time(word) if normalized_date: entities["DATE"].append(normalized_date.strftime("%Y-%m-%d")) else: entities["DATE"].append(word) elif "TIME" in label: entities["TIME"].append(word) elif "MISC" in label or "EVENT" in label: entities["EVENT"].append(word) return entities # Main function def process_text(text): entities = extract_entities(text) # Extract start time, end time, and calculate duration start_time = None end_time = None duration = "Unknown" # Look for time range patterns (e.g., "3PM to 4PM") time_pattern = re.compile(r"(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)\s*(?:to|until|-)\s*(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)", re.IGNORECASE) match = time_pattern.search(text) if match: start_str, end_str = match.groups() start_time = normalize_date_time(start_str) end_time = normalize_date_time(end_str) else: # If no range, use the first TIME entity as start time if entities["TIME"]: start_time = normalize_date_time(entities["TIME"][0]) # Calculate duration if both start and end times are available if start_time and end_time: duration = calculate_duration(start_time, end_time) start_time_str = start_time.strftime("%H:%M") end_time_str = end_time.strftime("%H:%M") else: start_time_str = start_time.strftime("%H:%M") if start_time else "None" end_time_str = "None" # Construct structured output result = { "Event": entities["EVENT"][0] if entities["EVENT"] else "Unknown", "People": ", ".join(entities["PERSON"]) if entities["PERSON"] else "None", "Date": ", ".join(entities["DATE"]) if entities["DATE"] else "None", "Start Time": start_time_str, "End Time": end_time_str, "Duration": duration, "Location": ", ".join(entities["GPE"]) if entities["GPE"] else "None" } # Format the output output = ( f"Event: {result['Event']}\n" f"People: {result['People']}\n" f"Date: {result['Date']}\n" f"Start Time: {result['Start Time']}\n" f"End Time: {result['End Time']}\n" f"Duration: {result['Duration']}\n" f"Location: {result['Location']}\n" ) return output # Gradio interface demo = gr.Interface( fn=process_text, inputs="text", outputs="text", title="Transformer-based Text Normalization", description="Extract events, dates, times, and locations from input text using a high-accuracy Transformer NER model." ) # Launch the app if __name__ == "__main__": print("Launching Gradio application...") demo.launch(server_name="0.0.0.0", server_port=7860)