Spaces:
Runtime error
Runtime error
File size: 5,171 Bytes
d3b2949 5df7bc2 d3b2949 289c6b4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 | import csv
import gradio as gr # type: ignore
import pandas as pd # type: ignore
with gr.Blocks(title="Conversation rebuilder",theme="gradio/monochrome") as app:
gr.Markdown(
"""# Conversation rebuilder
Please, fill the Database Transcript and the List of matched clips from Kibana, and click the Rebuild button"""
)
with gr.Row():
txt_transcript = gr.Code(
label="Database Transcript",
interactive=True,
lines=5,
)
txt_clips = gr.Code(
label="Kibana clips",
interactive=True,
lines=5,
)
with gr.Row():
gr.ClearButton(value="Clear", variant="secondary", size="sm", components=[txt_transcript, txt_clips])
btn_build = gr.Button(value="Rebuild", variant="primary", size="sm")
with gr.Row():
data = gr.Dataframe(
label="CONVERSATION",
headers=["index", "user", "agent", "gpt", "distance"],
datatype=["str", "str", "str", "str", "number"],
column_widths=["8%","29%","29%","29%","5%"],
# row_count=(1, "fixed"),
col_count=(5, "fixed"),
interactive=False,
wrap=True,
)
with gr.Row():
file = gr.File(
label="Export files",
show_label=True,
height=60,
container=True,
interactive=False,
file_count="single",
)
COL_TIMESTAMP = 0
COL_CONVERSATION_ID = 1
COL_CLIP_COLLECTION_ID = 2
COL_REQUEST_ID = 2
COL_SENTENCE_INDEX = 4
COL_SENTENCE_ORIGINAL = 5
COL_CLIP_TEXT = 6
COL_CLIP_ID = 7
COL_DISTANCE = 8
def find_clips_matched(agent_text,clips):
clip_list = clips.splitlines()
for clip in clip_list:
parts = clip.strip().split('\t')
if parts[COL_CLIP_TEXT] == agent_text:
return \
parts[COL_SENTENCE_ORIGINAL], \
int(parts[COL_SENTENCE_INDEX]), \
round(float(parts[COL_DISTANCE]),2)
@btn_build.click(inputs=[txt_transcript,txt_clips], outputs=[data, file])
def rebuild_conversation(transcript, clips):
df = pd.DataFrame({"index": [], "user": [], "agent": [], "gpt": [], "distance": []})
if not transcript.strip() or not clips.strip():
msg = f"EMPTY TRANSCRIPT OR LIST OF CLIPS!"
df.loc[len(df.index)] = ["", msg, "", "", ""]
return df, None
lines = transcript.splitlines()
user_text = ""
conversation_line = 1
for i in range(len(lines)):
line = lines[i].strip()
if line:
if line.startswith("user:"):
user_text = line.replace("user:","").strip()
conversation_line = conversation_line + 1
elif line.startswith("agent:"):
agent_text = line.replace("agent:","").strip()
gpt, order, distance = find_clips_matched(agent_text,clips)
index = f"{str(conversation_line).zfill(3)}-{str(order).zfill(2)}"
df.loc[len(df.index)] = [index, user_text, agent_text, gpt, distance]
user_text = ""
# Get conversation ID
clip_list = clips.splitlines()
parts = clip_list[0].strip().split('\t')
conversation_id = parts[COL_CONVERSATION_ID]
tsv_file_name = f'conversation-{conversation_id}.tsv'
excel_file_name = f'conversation-{conversation_id}.xlsx'
# Build TSV file
df.to_csv(
tsv_file_name,
sep="\t",
encoding="utf-8",
index=False,
header=True,
quoting=csv.QUOTE_ALL,
)
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html
df_excel = df.copy(deep=True)
# https://blog.enterprisedna.co/pandas-drop-index/
df_excel.set_index('index', inplace=True)
# Build Excel file
# https://xlsxwriter.readthedocs.io/working_with_pandas.html
# Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter(excel_file_name, engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df_excel.to_excel(writer, sheet_name='Conversation')
# Get the xlsxwriter workbook and worksheet objects.
workbook = writer.book
worksheet = writer.sheets["Conversation"]
# https://xlsxwriter.readthedocs.io/format.html#number-formats-in-different-locales
number_format = workbook.add_format({'num_format': '#,##0.00'})
text_format = workbook.add_format({'text_wrap': True})
# Set the columns widths.
worksheet.set_column("B:D", 50, text_format)
worksheet.set_column('E:E', 8, number_format)
# Autofit the worksheet.
worksheet.autofit()
# Close the Pandas Excel writer and output the Excel file.
writer.close()
return df, [excel_file_name,tsv_file_name]
app.launch()
|