Spaces:

Quantified
/

conversation-rebuilder

Runtime error

App Files Files Community

conversation-rebuilder / app.py

sergiomar73

Update app.py

5df7bc2 verified almost 2 years ago

raw

history blame contribute delete

5.17 kB

	import csv
	import gradio as gr # type: ignore
	import pandas as pd # type: ignore

	with gr.Blocks(title="Conversation rebuilder",theme="gradio/monochrome") as app:
	gr.Markdown(
	"""# Conversation rebuilder
	Please, fill the Database Transcript and the List of matched clips from Kibana, and click the Rebuild button"""
	)
	with gr.Row():
	txt_transcript = gr.Code(
	label="Database Transcript",
	interactive=True,
	lines=5,
	)
	txt_clips = gr.Code(
	label="Kibana clips",
	interactive=True,
	lines=5,
	)
	with gr.Row():
	gr.ClearButton(value="Clear", variant="secondary", size="sm", components=[txt_transcript, txt_clips])
	btn_build = gr.Button(value="Rebuild", variant="primary", size="sm")
	with gr.Row():
	data = gr.Dataframe(
	label="CONVERSATION",
	headers=["index", "user", "agent", "gpt", "distance"],
	datatype=["str", "str", "str", "str", "number"],
	column_widths=["8%","29%","29%","29%","5%"],
	# row_count=(1, "fixed"),
	col_count=(5, "fixed"),
	interactive=False,
	wrap=True,
	)
	with gr.Row():
	file = gr.File(
	label="Export files",
	show_label=True,
	height=60,
	container=True,
	interactive=False,
	file_count="single",
	)

	COL_TIMESTAMP = 0
	COL_CONVERSATION_ID = 1
	COL_CLIP_COLLECTION_ID = 2
	COL_REQUEST_ID = 2
	COL_SENTENCE_INDEX = 4
	COL_SENTENCE_ORIGINAL = 5
	COL_CLIP_TEXT = 6
	COL_CLIP_ID = 7
	COL_DISTANCE = 8

	def find_clips_matched(agent_text,clips):
	clip_list = clips.splitlines()
	for clip in clip_list:
	parts = clip.strip().split('\t')
	if parts[COL_CLIP_TEXT] == agent_text:
	return \
	parts[COL_SENTENCE_ORIGINAL], \
	int(parts[COL_SENTENCE_INDEX]), \
	round(float(parts[COL_DISTANCE]),2)

	@btn_build.click(inputs=[txt_transcript,txt_clips], outputs=[data, file])
	def rebuild_conversation(transcript, clips):
	df = pd.DataFrame({"index": [], "user": [], "agent": [], "gpt": [], "distance": []})
	if not transcript.strip() or not clips.strip():
	msg = f"EMPTY TRANSCRIPT OR LIST OF CLIPS!"
	df.loc[len(df.index)] = ["", msg, "", "", ""]
	return df, None
	lines = transcript.splitlines()
	user_text = ""
	conversation_line = 1
	for i in range(len(lines)):
	line = lines[i].strip()
	if line:
	if line.startswith("user:"):
	user_text = line.replace("user:","").strip()
	conversation_line = conversation_line + 1
	elif line.startswith("agent:"):
	agent_text = line.replace("agent:","").strip()
	gpt, order, distance = find_clips_matched(agent_text,clips)
	index = f"{str(conversation_line).zfill(3)}-{str(order).zfill(2)}"
	df.loc[len(df.index)] = [index, user_text, agent_text, gpt, distance]
	user_text = ""
	# Get conversation ID
	clip_list = clips.splitlines()
	parts = clip_list[0].strip().split('\t')
	conversation_id = parts[COL_CONVERSATION_ID]
	tsv_file_name = f'conversation-{conversation_id}.tsv'
	excel_file_name = f'conversation-{conversation_id}.xlsx'
	# Build TSV file
	df.to_csv(
	tsv_file_name,
	sep="\t",
	encoding="utf-8",
	index=False,
	header=True,
	quoting=csv.QUOTE_ALL,
	)
	# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.copy.html
	df_excel = df.copy(deep=True)
	# https://blog.enterprisedna.co/pandas-drop-index/
	df_excel.set_index('index', inplace=True)
	# Build Excel file
	# https://xlsxwriter.readthedocs.io/working_with_pandas.html
	# Create a Pandas Excel writer using XlsxWriter as the engine.
	writer = pd.ExcelWriter(excel_file_name, engine='xlsxwriter')
	# Convert the dataframe to an XlsxWriter Excel object.
	df_excel.to_excel(writer, sheet_name='Conversation')
	# Get the xlsxwriter workbook and worksheet objects.
	workbook = writer.book
	worksheet = writer.sheets["Conversation"]
	# https://xlsxwriter.readthedocs.io/format.html#number-formats-in-different-locales
	number_format = workbook.add_format({'num_format': '#,##0.00'})
	text_format = workbook.add_format({'text_wrap': True})
	# Set the columns widths.
	worksheet.set_column("B:D", 50, text_format)
	worksheet.set_column('E:E', 8, number_format)
	# Autofit the worksheet.
	worksheet.autofit()
	# Close the Pandas Excel writer and output the Excel file.
	writer.close()
	return df, [excel_file_name,tsv_file_name]

	app.launch()