Spaces:

AliasMedia
/

Schedule_Formatter_maintenance

Sleeping

File size: 5,454 Bytes

e387e2b
 
d2ed2b1
 
 
 
 
 
 
 
 
 
 
 
 
 
e387e2b
d2ed2b1
 
 
 
e387e2b
 
 
 
 
 
d2ed2b1
 
e387e2b
d2ed2b1
06b9c84
 
 
950501b
b175dc7
 
06b9c84
d2ed2b1
06b9c84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2ed2b1
b175dc7
 
 
 
 
 
 
 
 
 
 
a341f2b
 
b175dc7
 
 
d2ed2b1
06b9c84
 
 
facc6a0
06b9c84
 
d2ed2b1
 
06b9c84
 
 
d2ed2b1
06b9c84
 
 
 
 
 
 
 
 
950501b
22d5ae9
 
 
 
5190e9a
b175dc7
5190e9a
06b9c84
facc6a0
 
06b9c84
d2ed2b1
e634182
 
 
 
06b9c84
 
 
 
ba190fc
42ad299
 
 
950501b
92f884b
156d664
42ad299
 
 
d2ed2b1
 
 
 
42ad299
 
06b9c84
ba190fc
06b9c84
e634182
 
92f884b
42ad299
06b9c84
 
e634182
92f884b
 
42ad299
 
e634182
42ad299
 
92f884b
e634182
156d664
e634182
06b9c84
 
 
e387e2b

# app.py

# ─── monkey-patch Gradio to swallow API-info errors ───
import gradio as _gr

_orig_get_api_info = _gr.Blocks.get_api_info

def _safe_get_api_info(self):
    try:
        return _orig_get_api_info(self)
    except Exception:
        # return empty spec if parsing fails
        return {}

_gr.Blocks.get_api_info = _safe_get_api_info
# ───────────────────────────────────────────────────────

# ─── optional: patch client_utils.get_type to handle boolean schemas ───
import gradio_client.utils as _client_utils

_orig_get_type = _client_utils.get_type

def safe_get_type(schema, *args, **kwargs):
    if isinstance(schema, bool):
        return "dict"
    return _orig_get_type(schema, *args, **kwargs)

_client_utils.get_type = safe_get_type
# ─────────────────────────────────────────────────────────────────────

import gradio as gr
import pandas as pd
from fuzzywuzzy import process, fuzz
import tempfile
import os
from openpyxl import load_workbook
from openpyxl.styles import Alignment


def auto_correct_names(series, threshold=90):
    """Auto-correct typos in chatter names using fuzzy matching."""
    unique_names = series.dropna().unique()
    name_mapping = {}
    
    for name in unique_names:
        matches = process.extractBests(
            name, unique_names, 
            scorer=fuzz.token_sort_ratio, 
            score_cutoff=threshold
        )
        if matches:
            best_match = max(matches, key=lambda x: (x[1], list(series).count(x[0])))
            name_mapping[name] = best_match[0]
    
    return series.replace(name_mapping)


def adjust_excel_formatting(file_path):
    """Adjust column widths and enable text wrapping in Excel file."""
    wb = load_workbook(file_path)
    ws = wb.active

    for col in ws.columns:
        max_length = 0
        col_letter = col[0].column_letter
        for cell in col:
            if cell.value:
                max_length = max(max_length, len(str(cell.value)))
            cell.alignment = Alignment(wrap_text=True)
        ws.column_dimensions[col_letter].width = max_length + 2

    wb.save(file_path)


def process_file(input_file):
    """Process uploaded Excel file and return output"""
    try:
        input_df = pd.read_excel(input_file, header=1)
        date_columns = input_df.columns[1:].tolist()
        df_long = input_df.melt(
            id_vars=[input_df.columns[0]],
            var_name='DATE',
            value_name='CHATTER'
        )
        df_long['DATE'] = pd.Categorical(
            df_long['DATE'],
            categories=date_columns,
            ordered=True
        )
        df_long['CHATTER'] = auto_correct_names(df_long['CHATTER'])
        grouped = df_long.groupby(['CHATTER', 'DATE'], observed=True)[input_df.columns[0]] \
            .apply(lambda x: ', '.join(sorted(x))).reset_index()
        pivoted = grouped.pivot(index='CHATTER', columns='DATE', values=input_df.columns[0])
        chatter_order = grouped['CHATTER'].value_counts().index.tolist()
        final_df = pivoted.reindex(chatter_order)[date_columns].fillna("OFF")
        final_df = final_df.reset_index()
        original_filename = os.path.basename(input_file)
        name_part, ext_part = os.path.splitext(original_filename)
        processed_filename = f"{name_part}_processed{ext_part}"
        temp_file_path = os.path.join(tempfile.gettempdir(), processed_filename)
        final_df.to_excel(temp_file_path, index=False, sheet_name='Schedule')
        adjust_excel_formatting(temp_file_path)
        return final_df, temp_file_path
    except Exception as e:
        error_df = pd.DataFrame({"Error": [f"⚠️ {str(e)}"]})
        return error_df, None


def download_file(out_path):
    """Return the processed file path for download"""
    return out_path

with gr.Blocks(title="Schedule Processor") as demo:
    gr.Markdown("# 📅 Schedule Processor")
    gr.Markdown("Upload your schedule Excel file and download the formatted version")
    with gr.Row():
        input_file = gr.File(label="Upload Schedule File", type="filepath")
    with gr.Row():
        process_btn = gr.Button("Process File", variant="primary")
        reset_btn = gr.Button("Upload New File")
    output_table = gr.Dataframe(label="Preview", wrap=True)
    download_button = gr.Button("Download Processed File", visible=False)
    temp_file_path = gr.State(value=None)

    def reset_components():
        return [
            None,
            pd.DataFrame(),
            None,
            gr.update(visible=False)
        ]

    def process_and_show(file):
        df, out_path = process_file(file)
        if out_path:
            return df, out_path, gr.update(visible=True)
        return df, None, gr.update(visible=False)

    process_btn.click(
        process_and_show,
        inputs=input_file,
        outputs=[output_table, temp_file_path, download_button]
    )

    reset_btn.click(
        reset_components,
        outputs=[input_file, output_table, temp_file_path, download_button]
    )

    download_button.click(
        download_file,
        inputs=temp_file_path,
        outputs=gr.File(label="Processed Schedule")
    )

if __name__ == "__main__":
    demo.launch()