import gradio as gr from docx import Document from docx.shared import RGBColor from difflib import get_close_matches from utils import extract_text_from_pdf, parse_pdf_to_dict # Improved red color detection def is_red_color(run): color = run.font.color if color is None: return False if color.rgb: # Try detecting red tones even if slightly off red_shades = [ RGBColor(255, 0, 0), RGBColor(200, 0, 0), RGBColor(255, 20, 20), RGBColor(192, 0, 0), RGBColor(220, 20, 60), ] return any(color.rgb == shade for shade in red_shades) return False def replace_red_text_with_data(doc_path, data_dict): doc = Document(doc_path) matched = 0 for para in doc.paragraphs: for run in para.runs: if is_red_color(run): red_text = run.text.strip() print(f"Found red text: {red_text}") if not red_text: continue # Use fuzzy matching key_match = get_close_matches(red_text.lower(), [k.lower() for k in data_dict], n=1, cutoff=0.6) if key_match: for key in data_dict: if key.lower() == key_match[0]: print(f"Replacing '{red_text}' with '{data_dict[key]}'") run.text = data_dict[key] matched += 1 break print(f"Total replacements: {matched}") return doc def process_files(pdf_file, template_docx): pdf_path = pdf_file doc_path = template_docx output_path = "filled_output.docx" raw_text = extract_text_from_pdf(pdf_path) data_dict = parse_pdf_to_dict(raw_text) final_doc = replace_red_text_with_data(doc_path, data_dict) final_doc.save(output_path) return output_path demo = gr.Interface( fn=process_files, inputs=[ gr.File(label="Upload PDF Report", file_types=[".pdf"]), gr.File(label="Upload Word Template (.docx)", file_types=[".docx"]) ], outputs=gr.File(label="Download Filled Report (.docx)"), title="Audit Report Generator", description="Upload a PDF and a Word template. This tool will auto-fill red-colored outdated text with data from the PDF." ) if __name__ == "__main__": demo.launch()