Spaces:
Runtime error
Runtime error
| import csv | |
| import gradio as gr # type: ignore | |
| import pandas as pd # type: ignore | |
| with gr.Blocks(title="Conversation rebuilder",theme="gradio/monochrome") as app: | |
| gr.Markdown( | |
| """# Conversation rebuilder | |
| Please, fill the Database Transcript and the List of matched clips from Kibana, and click the Rebuild button""" | |
| ) | |
| with gr.Row(): | |
| txt_transcript = gr.Code( | |
| label="Database Transcript", | |
| interactive=True, | |
| lines=5, | |
| ) | |
| txt_clips = gr.Code( | |
| label="Kibana clips", | |
| interactive=True, | |
| lines=5, | |
| ) | |
| with gr.Row(): | |
| gr.ClearButton(value="Clear", variant="secondary", size="sm", components=[txt_transcript, txt_clips]) | |
| btn_build = gr.Button(value="Rebuild", variant="primary", size="sm") | |
| with gr.Row(): | |
| data = gr.Dataframe( | |
| label="CONVERSATION", | |
| headers=["index", "user", "agent", "gpt", "distance"], | |
| datatype=["str", "str", "str", "str", "number"], | |
| column_widths=["8%","29%","29%","29%","5%"], | |
| # row_count=(1, "fixed"), | |
| col_count=(5, "fixed"), | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| with gr.Row(): | |
| file = gr.File( | |
| label="Export files", | |
| show_label=True, | |
| height=60, | |
| container=True, | |
| interactive=False, | |
| file_count="single", | |
| ) | |
| COL_TIMESTAMP = 0 | |
| COL_CONVERSATION_ID = 1 | |
| COL_CLIP_COLLECTION_ID = 2 | |
| COL_REQUEST_ID = 2 | |
| COL_SENTENCE_INDEX = 4 | |
| COL_SENTENCE_ORIGINAL = 5 | |
| COL_CLIP_TEXT = 6 | |
| COL_CLIP_ID = 7 | |
| COL_DISTANCE = 8 | |
| def find_clips_matched(agent_text,clips): | |
| clip_list = clips.splitlines() | |
| for clip in clip_list: | |
| parts = clip.strip().split('\t') | |
| if parts[COL_CLIP_TEXT] == agent_text: | |
| return \ | |
| parts[COL_SENTENCE_ORIGINAL], \ | |
| int(parts[COL_SENTENCE_INDEX]), \ | |
| round(float(parts[COL_DISTANCE]),2) | |
| def rebuild_conversation(transcript, clips): | |
| df = pd.DataFrame({"index": [], "user": [], "agent": [], "gpt": [], "distance": []}) | |
| if not transcript.strip() or not clips.strip(): | |
| msg = f"EMPTY TRANSCRIPT OR LIST OF CLIPS!" | |
| df.loc[len(df.index)] = ["", msg, "", "", ""] | |
| return df, None | |
| lines = transcript.splitlines() | |
| user_text = "" | |
| conversation_line = 1 | |
| for i in range(len(lines)): | |
| line = lines[i].strip() | |
| if line: | |
| if line.startswith("user:"): | |
| user_text = line.replace("user:","").strip() | |
| conversation_line = conversation_line + 1 | |
| elif line.startswith("agent:"): | |
| agent_text = line.replace("agent:","").strip() | |
| gpt, order, distance = find_clips_matched(agent_text,clips) | |
| index = f"{str(conversation_line).zfill(3)}-{str(order).zfill(2)}" | |
| df.loc[len(df.index)] = [index, user_text, agent_text, gpt, distance] | |
| user_text = "" | |
| # Get conversation ID | |
| clip_list = clips.splitlines() | |
| parts = clip_list[0].strip().split('\t') | |
| conversation_id = parts[COL_CONVERSATION_ID] | |
| tsv_file_name = f'conversation-{conversation_id}.tsv' | |
| excel_file_name = f'conversation-{conversation_id}.xlsx' | |
| # Build TSV file | |
| df.to_csv( | |
| tsv_file_name, | |
| sep="\t", | |
| encoding="utf-8", | |
| index=False, | |
| header=True, | |
| quoting=csv.QUOTE_ALL, | |
| ) | |
| # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html | |
| df_excel = df.copy(deep=True) | |
| # https://blog.enterprisedna.co/pandas-drop-index/ | |
| df_excel.set_index('index', inplace=True) | |
| # Build Excel file | |
| # https://xlsxwriter.readthedocs.io/working_with_pandas.html | |
| # Create a Pandas Excel writer using XlsxWriter as the engine. | |
| writer = pd.ExcelWriter(excel_file_name, engine='xlsxwriter') | |
| # Convert the dataframe to an XlsxWriter Excel object. | |
| df_excel.to_excel(writer, sheet_name='Conversation') | |
| # Get the xlsxwriter workbook and worksheet objects. | |
| workbook = writer.book | |
| worksheet = writer.sheets["Conversation"] | |
| # https://xlsxwriter.readthedocs.io/format.html#number-formats-in-different-locales | |
| number_format = workbook.add_format({'num_format': '#,##0.00'}) | |
| text_format = workbook.add_format({'text_wrap': True}) | |
| # Set the columns widths. | |
| worksheet.set_column("B:D", 50, text_format) | |
| worksheet.set_column('E:E', 8, number_format) | |
| # Autofit the worksheet. | |
| worksheet.autofit() | |
| # Close the Pandas Excel writer and output the Excel file. | |
| writer.close() | |
| return df, [excel_file_name,tsv_file_name] | |
| app.launch() | |