Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,14 +30,12 @@ COLOR_MAP = {
|
|
| 30 |
"Follow-up Needed": "orange",
|
| 31 |
}
|
| 32 |
|
| 33 |
-
|
| 34 |
def read_docx(path):
|
| 35 |
if not docx:
|
| 36 |
return "Error: python-docx not installed."
|
| 37 |
d = docx.Document(path)
|
| 38 |
return "\n".join([p.text for p in d.paragraphs])
|
| 39 |
|
| 40 |
-
|
| 41 |
def read_vtt(path):
|
| 42 |
with open(path, "r", encoding="utf-8") as f:
|
| 43 |
lines = f.read().split("\n")
|
|
@@ -48,13 +46,11 @@ def read_vtt(path):
|
|
| 48 |
]
|
| 49 |
return " ".join(cleaned)
|
| 50 |
|
| 51 |
-
|
| 52 |
def get_empty_df():
|
| 53 |
return pd.DataFrame(
|
| 54 |
columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
|
| 55 |
)
|
| 56 |
|
| 57 |
-
|
| 58 |
def process_file(file_obj):
|
| 59 |
if file_obj is None:
|
| 60 |
return "", "", get_empty_df()
|
|
@@ -69,7 +65,6 @@ def process_file(file_obj):
|
|
| 69 |
text = f.read()
|
| 70 |
return text, name, get_empty_df()
|
| 71 |
|
| 72 |
-
|
| 73 |
def build_transcript_html(text, df):
|
| 74 |
display_text = text
|
| 75 |
if df is not None and not df.empty:
|
|
@@ -102,22 +97,23 @@ def build_transcript_html(text, df):
|
|
| 102 |
"""
|
| 103 |
return html
|
| 104 |
|
| 105 |
-
|
| 106 |
def apply_code(df, segment, code, file_id, *metadata_values):
|
| 107 |
-
if not
|
| 108 |
-
return df, "⚠️
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values))
|
| 110 |
new_row = {"File ID": file_id, "Coded Segment": segment, "Code": code, **meta_dict}
|
| 111 |
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
| 112 |
return df, f"✅ Segment coded as '{code}'"
|
| 113 |
|
| 114 |
-
|
| 115 |
def add_new_code(new_code, code_list):
|
| 116 |
if new_code and new_code not in code_list:
|
| 117 |
code_list.append(new_code)
|
| 118 |
return code_list
|
| 119 |
|
| 120 |
-
|
| 121 |
def export_excel(df):
|
| 122 |
if df.empty:
|
| 123 |
return None, "Nothing to export"
|
|
@@ -125,8 +121,8 @@ def export_excel(df):
|
|
| 125 |
df.to_excel(path, index=False)
|
| 126 |
return path, "Excel ready"
|
| 127 |
|
| 128 |
-
|
| 129 |
with gr.Blocks() as demo:
|
|
|
|
| 130 |
# States
|
| 131 |
full_text = gr.State("")
|
| 132 |
file_id = gr.State("")
|
|
@@ -140,7 +136,7 @@ with gr.Blocks() as demo:
|
|
| 140 |
for k, lbl in METADATA_FIELDS.items():
|
| 141 |
metadata_inputs.append(gr.Textbox(label=lbl))
|
| 142 |
|
| 143 |
-
#
|
| 144 |
with gr.Row():
|
| 145 |
# Left: transcript
|
| 146 |
with gr.Column(scale=3):
|
|
@@ -193,10 +189,10 @@ with gr.Blocks() as demo:
|
|
| 193 |
outputs=code_dropdown,
|
| 194 |
)
|
| 195 |
|
|
|
|
| 196 |
apply_btn.click(
|
| 197 |
apply_code,
|
| 198 |
-
inputs=[coded_df_state,
|
| 199 |
-
+ metadata_inputs,
|
| 200 |
outputs=[coded_df_state, status],
|
| 201 |
)
|
| 202 |
|
|
|
|
| 30 |
"Follow-up Needed": "orange",
|
| 31 |
}
|
| 32 |
|
|
|
|
| 33 |
def read_docx(path):
|
| 34 |
if not docx:
|
| 35 |
return "Error: python-docx not installed."
|
| 36 |
d = docx.Document(path)
|
| 37 |
return "\n".join([p.text for p in d.paragraphs])
|
| 38 |
|
|
|
|
| 39 |
def read_vtt(path):
|
| 40 |
with open(path, "r", encoding="utf-8") as f:
|
| 41 |
lines = f.read().split("\n")
|
|
|
|
| 46 |
]
|
| 47 |
return " ".join(cleaned)
|
| 48 |
|
|
|
|
| 49 |
def get_empty_df():
|
| 50 |
return pd.DataFrame(
|
| 51 |
columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys())
|
| 52 |
)
|
| 53 |
|
|
|
|
| 54 |
def process_file(file_obj):
|
| 55 |
if file_obj is None:
|
| 56 |
return "", "", get_empty_df()
|
|
|
|
| 65 |
text = f.read()
|
| 66 |
return text, name, get_empty_df()
|
| 67 |
|
|
|
|
| 68 |
def build_transcript_html(text, df):
|
| 69 |
display_text = text
|
| 70 |
if df is not None and not df.empty:
|
|
|
|
| 97 |
"""
|
| 98 |
return html
|
| 99 |
|
|
|
|
| 100 |
def apply_code(df, segment, code, file_id, *metadata_values):
|
| 101 |
+
if not file_id:
|
| 102 |
+
return df, "⚠️ Upload a file first"
|
| 103 |
+
if not segment:
|
| 104 |
+
return df, "⚠️ Select text in transcript"
|
| 105 |
+
if not code:
|
| 106 |
+
return df, "⚠️ Select a code"
|
| 107 |
meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values))
|
| 108 |
new_row = {"File ID": file_id, "Coded Segment": segment, "Code": code, **meta_dict}
|
| 109 |
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
| 110 |
return df, f"✅ Segment coded as '{code}'"
|
| 111 |
|
|
|
|
| 112 |
def add_new_code(new_code, code_list):
|
| 113 |
if new_code and new_code not in code_list:
|
| 114 |
code_list.append(new_code)
|
| 115 |
return code_list
|
| 116 |
|
|
|
|
| 117 |
def export_excel(df):
|
| 118 |
if df.empty:
|
| 119 |
return None, "Nothing to export"
|
|
|
|
| 121 |
df.to_excel(path, index=False)
|
| 122 |
return path, "Excel ready"
|
| 123 |
|
|
|
|
| 124 |
with gr.Blocks() as demo:
|
| 125 |
+
|
| 126 |
# States
|
| 127 |
full_text = gr.State("")
|
| 128 |
file_id = gr.State("")
|
|
|
|
| 136 |
for k, lbl in METADATA_FIELDS.items():
|
| 137 |
metadata_inputs.append(gr.Textbox(label=lbl))
|
| 138 |
|
| 139 |
+
# Main row: transcript left, coding right
|
| 140 |
with gr.Row():
|
| 141 |
# Left: transcript
|
| 142 |
with gr.Column(scale=3):
|
|
|
|
| 189 |
outputs=code_dropdown,
|
| 190 |
)
|
| 191 |
|
| 192 |
+
# Apply button uses hidden textbox as input
|
| 193 |
apply_btn.click(
|
| 194 |
apply_code,
|
| 195 |
+
inputs=[coded_df_state, hidden_segment, code_dropdown, file_id] + metadata_inputs,
|
|
|
|
| 196 |
outputs=[coded_df_state, status],
|
| 197 |
)
|
| 198 |
|