Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Document Assembler | |
| Handles creating medical documents by inserting sections into Word templates | |
| """ | |
| import os | |
| import re | |
| from datetime import datetime | |
| from typing import Dict, Any, List | |
| from docx import Document | |
| from docx.enum.text import WD_ALIGN_PARAGRAPH | |
| from docx.shared import Pt | |
| from langchain.tools import tool | |
| from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain.agents import AgentExecutor, create_openai_tools_agent | |
| def create_medical_document(template_path: str, sections_text: str, title: str, output_path: str) -> str: | |
| """Create a medical document by inserting sections into a Word template.""" | |
| if not os.path.exists(template_path): | |
| raise FileNotFoundError(f"Template file not found: {template_path}") | |
| doc = Document(template_path) | |
| # Parse sections from text | |
| sections = {} | |
| current_section = None | |
| current_content = [] | |
| for line in sections_text.split('\n'): | |
| line = line.strip() | |
| if not line: | |
| continue | |
| line_lower = line.lower().replace('é', 'e').replace('è', 'e').replace('à', 'a') | |
| if any(keyword in line_lower for keyword in ['technique', 'resultat', 'conclusion', 'indication']): | |
| if current_section: | |
| sections[current_section] = '\n'.join(current_content).strip() | |
| current_section = line | |
| current_content = [] | |
| elif current_section: | |
| current_content.append(line) | |
| if current_section and current_content: | |
| sections[current_section] = '\n'.join(current_content).strip() | |
| # First, check if there's a "Titre" section in the template and insert the title there | |
| title_section_found = False | |
| for idx, paragraph in enumerate(doc.paragraphs): | |
| para_text = paragraph.text.strip() | |
| para_norm = para_text.lower().replace('é', 'e').replace('è', 'e').replace( | |
| 'à', 'a').replace(':', '').replace('\xa0', ' ').strip() | |
| # Check if this is a title section (case insensitive) | |
| if 'titre' in para_norm: | |
| print( | |
| f"🎯 Found title section in template: '{para_text}' at index {idx}") | |
| # Clear the paragraph and insert the generated title | |
| paragraph.clear() | |
| paragraph.text = title | |
| # Apply formatting to make it stand out | |
| for run in paragraph.runs: | |
| run.font.bold = True | |
| run.font.size = Pt(14) | |
| title_section_found = True | |
| break | |
| # If no title section found, add header with dynamic title | |
| if not title_section_found: | |
| print("📝 No title section found in template, adding header...") | |
| header_para = doc.paragraphs[0].insert_paragraph_before() | |
| header_text = f"{title}\nDate: {datetime.now().strftime('%d/%m/%Y')}\nHeure: {datetime.now().strftime('%H:%M')}\n{'='*40}" | |
| header_para.text = header_text | |
| for run in header_para.runs: | |
| run.font.bold = True | |
| run.font.size = Pt(14) | |
| header_para.alignment = WD_ALIGN_PARAGRAPH.CENTER | |
| # Locate section titles in the template | |
| section_indices = {} | |
| for idx, paragraph in enumerate(doc.paragraphs): | |
| para_text = paragraph.text.strip() | |
| para_norm = para_text.lower().replace('é', 'e').replace('è', 'e').replace( | |
| 'à', 'a').replace(':', '').replace('\xa0', ' ').strip() | |
| for section_name in sections.keys(): | |
| section_norm = section_name.lower().replace('é', 'e').replace( | |
| 'è', 'e').replace('à', 'a').replace(':', '').strip() | |
| if (section_norm in para_norm and len(section_norm) > 0 and len(para_norm) > 0): | |
| section_indices[section_name] = idx | |
| print("DEBUG section_indices:", section_indices) | |
| print("DEBUG sections.keys():", list(sections.keys())) | |
| # For each section found, remove content between this title and the next title, then insert the generated content | |
| sorted_sections = sorted(section_indices.items(), key=lambda x: x[1]) | |
| for i, (section_name, idx) in enumerate(sorted_sections): | |
| # Determine the end of the section (before the next title or end of doc) | |
| start = idx + 1 | |
| if i + 1 < len(sorted_sections): | |
| end = sorted_sections[i+1][1] | |
| else: | |
| end = len(doc.paragraphs) | |
| # Remove paragraphs between start and end | |
| for j in range(end-1, start-1, -1): | |
| p = doc.paragraphs[j] | |
| if p.text.strip(): | |
| p.clear() | |
| # Insert content right after the title | |
| if sections[section_name]: | |
| new_para = doc.paragraphs[idx+1] if (idx+1 < | |
| len(doc.paragraphs)) else doc.add_paragraph() | |
| new_para.text = sections[section_name] | |
| doc.save(output_path) | |
| return f"Document created successfully: {output_path}" | |
| def create_document_assembler_agent(llm): | |
| """Create the document assembler agent.""" | |
| document_assembler_prompt = ChatPromptTemplate.from_messages([ | |
| ("system", """You are a medical document assembler. | |
| Create medical documents by inserting sections into Word templates. | |
| Use the provided title for the document header and insert sections in the correct locations."""), | |
| ("human", | |
| "Create a medical document with template {template_path}, sections content: {sections_text}, title: {title}, and save to {output_path}"), | |
| MessagesPlaceholder("agent_scratchpad") | |
| ]) | |
| document_assembler_agent = create_openai_tools_agent( | |
| llm=llm, | |
| tools=[create_medical_document], | |
| prompt=document_assembler_prompt | |
| ) | |
| document_assembler_executor = AgentExecutor( | |
| agent=document_assembler_agent, | |
| tools=[create_medical_document], | |
| verbose=True | |
| ) | |
| return document_assembler_executor | |