File size: 3,411 Bytes
f02f2d2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
"""
File handling utilities for document upload and validation.
"""
import os
import shutil
from pathlib import Path
from typing import Optional, Tuple
import config


def validate_file(file_path: str) -> Tuple[bool, str]:
    """
    Validate uploaded file.

    Args:
        file_path: Path to the file to validate

    Returns:
        Tuple of (is_valid, error_message)
    """
    file_path_obj = Path(file_path)

    # Check if file exists
    if not file_path_obj.exists():
        return False, "File does not exist"

    # Check file extension
    if file_path_obj.suffix.lower() not in config.ALLOWED_EXTENSIONS:
        return False, f"Invalid file type. Allowed: {', '.join(config.ALLOWED_EXTENSIONS)}"

    # Check file size
    file_size_mb = file_path_obj.stat().st_size / (1024 * 1024)
    if file_size_mb > config.MAX_FILE_SIZE_MB:
        return False, f"File too large ({file_size_mb:.1f}MB). Max: {config.MAX_FILE_SIZE_MB}MB"

    return True, ""


def save_uploaded_file(uploaded_file, destination_dir: Path = None) -> str:
    """
    Save an uploaded Streamlit file to disk.

    Args:
        uploaded_file: Streamlit UploadedFile object
        destination_dir: Directory to save the file (default: config.UPLOAD_DIR)

    Returns:
        Path to saved file as string
    """
    if destination_dir is None:
        destination_dir = config.UPLOAD_DIR

    # Ensure destination directory exists
    destination_dir.mkdir(parents=True, exist_ok=True)

    # Create file path
    file_path = destination_dir / uploaded_file.name

    # Write file
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    return str(file_path)


def cleanup_file(file_path: str) -> bool:
    """
    Delete a file from disk.

    Args:
        file_path: Path to file to delete

    Returns:
        True if deleted successfully, False otherwise
    """
    try:
        file_path_obj = Path(file_path)
        if file_path_obj.exists():
            file_path_obj.unlink()
            return True
        return False
    except Exception as e:
        print(f"Error deleting file {file_path}: {e}")
        return False


def cleanup_directory(dir_path: Path, keep_dir: bool = True) -> bool:
    """
    Clean up all files in a directory.

    Args:
        dir_path: Directory to clean
        keep_dir: If True, keep the directory but remove contents

    Returns:
        True if successful, False otherwise
    """
    try:
        if dir_path.exists():
            if keep_dir:
                # Remove all files but keep directory
                for item in dir_path.iterdir():
                    if item.is_file():
                        item.unlink()
                    elif item.is_dir():
                        shutil.rmtree(item)
            else:
                # Remove directory entirely
                shutil.rmtree(dir_path)
        return True
    except Exception as e:
        print(f"Error cleaning directory {dir_path}: {e}")
        return False


def get_file_type(file_path: str) -> str:
    """
    Get the file type from file extension.

    Args:
        file_path: Path to file

    Returns:
        File type as string ('pdf' or 'docx')
    """
    extension = Path(file_path).suffix.lower()
    if extension == ".pdf":
        return "pdf"
    elif extension in [".docx", ".doc"]:
        return "docx"
    else:
        return "unknown"