import gradio as gr import pandas as pd import os try: import docx except ImportError: docx = None # --- Default codes and metadata --- DEFAULT_CODES = [ "Travail", # work, employment "Famille", # family "Formation", # education, training "Association", # community / associations "Santé", # health "Politique", # politics "Loisir", # leisure "Religion", # religion / spiritual "Émigration", # migration "Autre", # other / miscellaneous ] METADATA_FIELDS = { "interview_id": "ID de l'entretien", "interview_date": "Date de l'entretien", "occupation": "Profession", "age": "Âge", } COLOR_MAP = { "Travail": "lightblue", "Famille": "lightgreen", "Formation": "khaki", "Association": "orange", "Santé": "lightpink", "Politique": "violet", "Loisir": "lightcoral", "Religion": "lightyellow", "Émigration": "lightcyan", "Autre": "gray", } # --- File processing --- def read_docx(path): if not docx: return "Error: python-docx not installed." d = docx.Document(path) return "\n".join([p.text for p in d.paragraphs]) def read_vtt(path): with open(path, "r", encoding="utf-8") as f: lines = f.read().split("\n") cleaned = [ l.strip() for l in lines if l and "WEBVTT" not in l and "-->" not in l and not l.strip().isdigit() ] return " ".join(cleaned) def get_empty_df(): return pd.DataFrame( columns=["File ID", "Coded Segment", "Code"] + list(METADATA_FIELDS.keys()) ) def process_file(file_obj): if file_obj is None: return "", "", get_empty_df() path = file_obj.name name = os.path.basename(path) if name.lower().endswith(".docx"): text = read_docx(path) elif name.lower().endswith(".vtt"): text = read_vtt(path) else: with open(path, "r", encoding="utf-8") as f: text = f.read() return text, name, get_empty_df() # --- Apply coding --- def apply_code(df, segment, code, file_id, *metadata_values): if not file_id: return df, "⚠️ Upload a file first", gr.update(value="") if not segment: return df, "⚠️ Paste a segment first", gr.update(value="") if not code: return df, "⚠️ Select a code", gr.update(value="") meta_dict = dict(zip(METADATA_FIELDS.keys(), metadata_values)) new_row = {"File ID": file_id, "Coded Segment": segment, "Code": code, **meta_dict} df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True) # Clear segment box after applying return df, f"✅ Segment coded as '{code}'", gr.update(value="") # --- Add new code --- def add_new_code(new_code, code_list): if new_code and new_code not in code_list: code_list.append(new_code) return code_list # --- Export to Excel --- def export_excel(df): if df.empty: return None, "Nothing to export" path = "coded_segments.xlsx" df.to_excel(path, index=False) return path, "Excel ready" # ---------------------------- # GRADIO APP # ---------------------------- with gr.Blocks() as demo: # --- States --- full_text = gr.State("") file_id = gr.State("") coded_df_state = gr.State(get_empty_df()) code_categories_state = gr.State(DEFAULT_CODES) # --- Metadata on top --- with gr.Row(): metadata_inputs = [] for k, lbl in METADATA_FIELDS.items(): metadata_inputs.append(gr.Textbox(label=lbl)) # --- Main interface --- with gr.Row(): # Left: transcript with gr.Column(scale=3): transcript_box = gr.Textbox( label="Transcript (copy the text you want to code)", lines=25, interactive=True, placeholder="Upload a file to see transcript..." ) # Right: coding tools with gr.Column(scale=2): gr.Markdown("## 🏷️ Code Segment") segment_box = gr.Textbox( label="Segment to code (paste here)", lines=4, ) code_dropdown = gr.Dropdown(label="Select code", choices=DEFAULT_CODES) code_input = gr.Textbox(label="Or type new code") add_code_btn = gr.Button("Add new code") apply_btn = gr.Button("Apply code") gr.Markdown("## 📊 Coded Segments") table = gr.Dataframe(interactive=False) export_btn = gr.Button("Export XLSX") export_file = gr.File(visible=False) file_input = gr.File(label="Upload transcript", file_types=[".docx", ".vtt", ".txt"]) status = gr.Textbox(label="Status", value="Ready") # --- Callbacks --- file_input.change( fn=process_file, inputs=file_input, outputs=[transcript_box, file_id, coded_df_state] ) add_code_btn.click( add_new_code, inputs=[code_input, code_categories_state], outputs=[code_categories_state] ) code_categories_state.change( lambda codes: gr.update(choices=codes), inputs=code_categories_state, outputs=code_dropdown ) apply_btn.click( apply_code, inputs=[coded_df_state, segment_box, code_dropdown, file_id] + metadata_inputs, outputs=[coded_df_state, status, segment_box] ) coded_df_state.change(lambda df: df, inputs=coded_df_state, outputs=table) export_btn.click( export_excel, inputs=coded_df_state, outputs=[export_file, status] ).then( lambda f: gr.update(visible=f is not None), inputs=export_file, outputs=export_file ) demo.launch()