Spaces:
Sleeping
Sleeping
| """Markdown parsing module for fabric-to-espanso.""" | |
| from typing import Tuple, List, Optional, Set | |
| from pathlib import Path | |
| import regex | |
| import logging | |
| from .exceptions import ParsingError | |
| from .config import config | |
| logger = logging.getLogger('fabric_to_espanso') | |
| def create_section_pattern(keywords: Set[str]) -> regex.Pattern: | |
| keyword_pattern = '|'.join(regex.escape(kw) for kw in keywords) | |
| return regex.compile( | |
| rf'^#\s+.*(?:{keyword_pattern}).*$\n?(?:(?!^#).*\n?)*', | |
| regex.MULTILINE | regex.IGNORECASE | |
| ) | |
| def parse_markdown_file( | |
| file_path: str | Path, | |
| keywords: Optional[Set[str]] = None | |
| ) -> Tuple[str, Optional[str]]: | |
| """Extract sections with specified keywords from markdown file. | |
| Args: | |
| file_path: Path to markdown file | |
| keywords: Set of keywords to match in headings. If None, uses defaults from config | |
| Returns: | |
| Tuple of (full_content, extracted_sections) | |
| If no sections match, returns (full_content, None) | |
| Raises: | |
| ParsingError: If file reading or parsing fails | |
| """ | |
| try: | |
| # Use provided keywords or defaults from config | |
| keywords = keywords or set(config.base_words) | |
| # Create regex pattern for keywords in headings and text | |
| section_pattern = create_section_pattern(keywords) | |
| # Read file content | |
| path = Path(file_path) | |
| try: | |
| content = path.read_text(encoding='utf-8') | |
| except Exception as e: | |
| raise ParsingError(f"Failed to read {path}: {str(e)}") from e | |
| # Find all matching headings | |
| section_matches = list(section_pattern.findall(content)) | |
| # If no matches found, return full content | |
| if not section_matches: | |
| logger.debug(f"No matching sections found in {path.name}") | |
| return content, None | |
| # Join sections with double newline | |
| extracted = '\n\n'.join(section_matches) | |
| logger.debug(f"Extracted {len(section_matches)} sections from {path.name}") | |
| return content, extracted | |
| except Exception as e: | |
| logger.error(f"Error parsing {file_path}: {str(e)}", exc_info=True) | |
| if isinstance(e, ParsingError): | |
| raise | |
| raise ParsingError(f"Unexpected error parsing {file_path}: {str(e)}") from e | |
| def main(): | |
| # Example usage | |
| try: | |
| # Custom keywords can be passed as second argument | |
| result = parse_markdown_file('document.md') | |
| # result = extract_sections('document.md', {'Identity', 'Purpose', 'Scope'}) | |
| print(result) | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| if __name__ == '__main__': | |
| main() |