Zoe911's picture
Create app.py
faf842b verified
import gradio as gr
import dateutil.parser
import re
import os
import sys
from datetime import datetime, timedelta
from transformers import pipeline
# load Transformer model
ner_pipeline = pipeline("ner", model="dbmdz/bert-large-cased-finetuned-conll03-english")
# Helper function: Normalize date and time expressions
def normalize_date_time(text):
try:
parsed_date = dateutil.parser.parse(text, fuzzy=True)
return parsed_date
except (ValueError, TypeError):
return None
# Calculate duration between two times
def calculate_duration(start_time, end_time):
if isinstance(start_time, datetime) and isinstance(end_time, datetime):
delta = end_time - start_time
total_minutes = int(delta.total_seconds() / 60)
hours = total_minutes // 60
minutes = total_minutes % 60
return f"{hours}h {minutes}m"
return "Unknown"
# Extract entities using Transformer NER model
def extract_entities(text):
ner_results = ner_pipeline(text)
entities = {
"PERSON": [],
"DATE": [],
"TIME": [],
"GPE": [],
"EVENT": []
}
for entity in ner_results:
label = entity["entity"]
word = entity["word"]
# Convert Hugging Face labels to meaningful categories
if "PER" in label:
entities["PERSON"].append(word)
elif "ORG" in label or "LOC" in label or "GPE" in label:
entities["GPE"].append(word)
elif "DATE" in label:
normalized_date = normalize_date_time(word)
if normalized_date:
entities["DATE"].append(normalized_date.strftime("%Y-%m-%d"))
else:
entities["DATE"].append(word)
elif "TIME" in label:
entities["TIME"].append(word)
elif "MISC" in label or "EVENT" in label:
entities["EVENT"].append(word)
return entities
# Main function
def process_text(text):
entities = extract_entities(text)
# Extract start time, end time, and calculate duration
start_time = None
end_time = None
duration = "Unknown"
# Look for time range patterns (e.g., "3PM to 4PM")
time_pattern = re.compile(r"(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)\s*(?:to|until|-)\s*(\d{1,2}(?::\d{2})?\s*(?:AM|PM|am|pm)?)", re.IGNORECASE)
match = time_pattern.search(text)
if match:
start_str, end_str = match.groups()
start_time = normalize_date_time(start_str)
end_time = normalize_date_time(end_str)
else:
# If no range, use the first TIME entity as start time
if entities["TIME"]:
start_time = normalize_date_time(entities["TIME"][0])
# Calculate duration if both start and end times are available
if start_time and end_time:
duration = calculate_duration(start_time, end_time)
start_time_str = start_time.strftime("%H:%M")
end_time_str = end_time.strftime("%H:%M")
else:
start_time_str = start_time.strftime("%H:%M") if start_time else "None"
end_time_str = "None"
# Construct structured output
result = {
"Event": entities["EVENT"][0] if entities["EVENT"] else "Unknown",
"People": ", ".join(entities["PERSON"]) if entities["PERSON"] else "None",
"Date": ", ".join(entities["DATE"]) if entities["DATE"] else "None",
"Start Time": start_time_str,
"End Time": end_time_str,
"Duration": duration,
"Location": ", ".join(entities["GPE"]) if entities["GPE"] else "None"
}
# Format the output
output = (
f"Event: {result['Event']}\n"
f"People: {result['People']}\n"
f"Date: {result['Date']}\n"
f"Start Time: {result['Start Time']}\n"
f"End Time: {result['End Time']}\n"
f"Duration: {result['Duration']}\n"
f"Location: {result['Location']}\n"
)
return output
# Gradio interface
demo = gr.Interface(
fn=process_text,
inputs="text",
outputs="text",
title="Transformer-based Text Normalization",
description="Extract events, dates, times, and locations from input text using a high-accuracy Transformer NER model."
)
# Launch the app
if __name__ == "__main__":
print("Launching Gradio application...")
demo.launch(server_name="0.0.0.0", server_port=7860)