Spaces:

Quantified
/

conversation-rebuilder

Runtime error

File size: 5,171 Bytes

import csv
import gradio as gr # type: ignore
import pandas as pd # type: ignore

with gr.Blocks(title="Conversation rebuilder",theme="gradio/monochrome") as app:
    gr.Markdown(
    """# Conversation rebuilder
    Please, fill the Database Transcript and the List of matched clips from Kibana, and click the Rebuild button"""
    )
    with gr.Row():
        txt_transcript = gr.Code(
            label="Database Transcript",
            interactive=True,
            lines=5,
        )
        txt_clips = gr.Code(
            label="Kibana clips",
            interactive=True,
            lines=5,
        )
    with gr.Row():
        gr.ClearButton(value="Clear", variant="secondary", size="sm", components=[txt_transcript, txt_clips])
        btn_build = gr.Button(value="Rebuild", variant="primary", size="sm")
    with gr.Row():
        data = gr.Dataframe(
            label="CONVERSATION",
            headers=["index", "user", "agent", "gpt", "distance"],
            datatype=["str", "str", "str", "str", "number"],
            column_widths=["8%","29%","29%","29%","5%"],
            # row_count=(1, "fixed"),
            col_count=(5, "fixed"),
            interactive=False,
            wrap=True,            
        )
    with gr.Row():
        file = gr.File(
            label="Export files",
            show_label=True,
            height=60,
            container=True,
            interactive=False,
            file_count="single",
        )        

    COL_TIMESTAMP = 0
    COL_CONVERSATION_ID = 1
    COL_CLIP_COLLECTION_ID = 2
    COL_REQUEST_ID = 2
    COL_SENTENCE_INDEX = 4
    COL_SENTENCE_ORIGINAL = 5
    COL_CLIP_TEXT = 6
    COL_CLIP_ID = 7
    COL_DISTANCE = 8

    def find_clips_matched(agent_text,clips):
        clip_list = clips.splitlines()
        for clip in clip_list:
            parts = clip.strip().split('\t')
            if parts[COL_CLIP_TEXT] == agent_text:
                return \
                    parts[COL_SENTENCE_ORIGINAL], \
                    int(parts[COL_SENTENCE_INDEX]), \
                    round(float(parts[COL_DISTANCE]),2)

    @btn_build.click(inputs=[txt_transcript,txt_clips], outputs=[data, file])
    def rebuild_conversation(transcript, clips):
        df = pd.DataFrame({"index": [], "user": [], "agent": [], "gpt": [], "distance": []})
        if not transcript.strip() or not clips.strip():
            msg = f"EMPTY TRANSCRIPT OR LIST OF CLIPS!"
            df.loc[len(df.index)] = ["", msg, "", "", ""]
            return df, None
        lines = transcript.splitlines()        
        user_text = ""
        conversation_line = 1
        for i in range(len(lines)):
            line = lines[i].strip()
            if line:
                if line.startswith("user:"):
                    user_text = line.replace("user:","").strip()
                    conversation_line = conversation_line + 1
                elif line.startswith("agent:"):
                    agent_text = line.replace("agent:","").strip()
                    gpt, order, distance = find_clips_matched(agent_text,clips)
                    index = f"{str(conversation_line).zfill(3)}-{str(order).zfill(2)}"
                    df.loc[len(df.index)] = [index, user_text, agent_text, gpt, distance]
                    user_text = ""
        # Get conversation ID
        clip_list = clips.splitlines()
        parts = clip_list[0].strip().split('\t')
        conversation_id = parts[COL_CONVERSATION_ID]
        tsv_file_name = f'conversation-{conversation_id}.tsv'
        excel_file_name = f'conversation-{conversation_id}.xlsx'        
        # Build TSV file
        df.to_csv(
            tsv_file_name,
            sep="\t",
            encoding="utf-8",
            index=False,
            header=True,
            quoting=csv.QUOTE_ALL,
        )        
        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html
        df_excel = df.copy(deep=True)
        # https://blog.enterprisedna.co/pandas-drop-index/        
        df_excel.set_index('index', inplace=True)
        # Build Excel file
        # https://xlsxwriter.readthedocs.io/working_with_pandas.html         
        # Create a Pandas Excel writer using XlsxWriter as the engine.
        writer = pd.ExcelWriter(excel_file_name, engine='xlsxwriter')
        # Convert the dataframe to an XlsxWriter Excel object.
        df_excel.to_excel(writer, sheet_name='Conversation')
        # Get the xlsxwriter workbook and worksheet objects.
        workbook = writer.book
        worksheet = writer.sheets["Conversation"]
        # https://xlsxwriter.readthedocs.io/format.html#number-formats-in-different-locales
        number_format = workbook.add_format({'num_format': '#,##0.00'})
        text_format = workbook.add_format({'text_wrap': True})
        # Set the columns widths.
        worksheet.set_column("B:D", 50, text_format)
        worksheet.set_column('E:E', 8, number_format)
        # Autofit the worksheet.
        worksheet.autofit()
        # Close the Pandas Excel writer and output the Excel file.
        writer.close()
        return df, [excel_file_name,tsv_file_name]

app.launch()