File size: 6,984 Bytes
492875b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
#!/usr/bin/env python3
"""
Basic environment test that checks core structure without heavy ML dependencies.
Use this for local testing before pushing to HuggingFace Spaces.
"""

import sys
import os
import importlib.util
from pathlib import Path

def test_basic_python_imports():
    """Test basic Python and lightweight imports."""
    print("πŸ” Testing Basic Dependencies...")

    basic_imports = [
        ("os", "Operating System Interface"),
        ("sys", "System-specific parameters"),
        ("json", "JSON encoder/decoder"),
        ("pathlib", "Object-oriented filesystem paths"),
        ("typing", "Type hints"),
        ("logging", "Logging facility"),
        ("tempfile", "Temporary file operations"),
        ("datetime", "Date and time handling"),
        ("importlib", "Import machinery"),
    ]

    failed = 0
    for module, desc in basic_imports:
        try:
            __import__(module)
            print(f"βœ… {module} - {desc}")
        except ImportError as e:
            print(f"❌ {module} - {desc}: {e}")
            failed += 1

    return failed

def test_available_packages():
    """Test which ML packages are already available."""
    print("\nπŸ” Testing Available ML Packages...")

    ml_packages = [
        "torch", "numpy", "PIL", "requests", "mlflow",
        "arabic_reshaper", "fastapi", "transformers",
        "gradio", "datasets", "accelerate", "peft"
    ]

    available = []
    unavailable = []

    for package in ml_packages:
        try:
            __import__(package)
            available.append(package)
            print(f"βœ… {package}")
        except ImportError:
            unavailable.append(package)
            print(f"⚠️ {package} - Not available")

    print(f"\nπŸ“Š Summary: {len(available)}/{len(ml_packages)} packages available")

    if unavailable:
        print(f"\nπŸ“ Missing packages for HF Spaces:")
        for pkg in unavailable:
            print(f"  - {pkg}")

    return len(unavailable)

def test_file_structure():
    """Test that all required files exist."""
    print("\nπŸ” Testing File Structure...")

    required_files = [
        "app.py",
        "requirements.txt",
        "README.md",
        "mlflow_arabic_ocr_config.py",
        "pipelines/__init__.py",
        "pipelines/arabic_ocr/__init__.py",
        "pipelines/arabic_ocr_training_pipeline.py",
    ]

    missing = []
    current_dir = Path(__file__).parent

    for file_path in required_files:
        full_path = current_dir / file_path
        if full_path.exists():
            print(f"βœ… {file_path}")
        else:
            print(f"❌ {file_path} - Missing")
            missing.append(file_path)

    if missing:
        print(f"\nπŸ“ Missing files:")
        for file in missing:
            print(f"  - {file}")

    return len(missing)

def test_syntax_validation():
    """Test that Python files have valid syntax."""
    print("\nπŸ” Testing Python Syntax...")

    current_dir = Path(__file__).parent
    python_files = []

    # Find all Python files
    for root, dirs, files in os.walk(current_dir):
        for file in files:
            if file.endswith('.py') and not file.startswith('test_'):
                python_files.append(os.path.join(root, file))

    syntax_errors = 0
    for file_path in python_files:
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                compile(f.read(), file_path, 'exec')
            rel_path = os.path.relpath(file_path, current_dir)
            print(f"βœ… {rel_path}")
        except SyntaxError as e:
            rel_path = os.path.relpath(file_path, current_dir)
            print(f"❌ {rel_path} - Syntax error: {e}")
            syntax_errors += 1
        except Exception as e:
            rel_path = os.path.relpath(file_path, current_dir)
            print(f"⚠️ {rel_path} - Error reading: {e}")

    return syntax_errors

def test_requirements_format():
    """Test that requirements.txt is properly formatted."""
    print("\nπŸ” Testing Requirements.txt Format...")

    try:
        with open('requirements.txt', 'r') as f:
            lines = f.readlines()

        print(f"βœ… requirements.txt found ({len(lines)} lines)")

        # Count non-comment, non-empty lines
        actual_deps = [line.strip() for line in lines
                      if line.strip() and not line.strip().startswith('#')]

        print(f"βœ… {len(actual_deps)} dependencies listed")

        # Check for known problematic packages
        problematic = []
        for line in actual_deps:
            if any(pkg in line.lower() for pkg in ['xformers', 'bitsandbytes']):
                if not line.strip().startswith('#'):
                    problematic.append(line.strip())

        if problematic:
            print(f"⚠️ Found build-heavy dependencies (may fail locally):")
            for dep in problematic:
                print(f"  - {dep}")

        return 0

    except FileNotFoundError:
        print("❌ requirements.txt not found")
        return 1

def test_readme_format():
    """Test that README.md has proper HuggingFace Spaces format."""
    print("\nπŸ” Testing README.md Format...")

    try:
        with open('README.md', 'r') as f:
            content = f.read()

        print("βœ… README.md found")

        # Check for YAML frontmatter
        if content.startswith('---'):
            print("βœ… YAML frontmatter detected")

            # Check for required HF Spaces fields
            required_fields = ['title:', 'sdk:', 'app_file:', 'hardware:']
            missing_fields = []

            for field in required_fields:
                if field in content:
                    print(f"βœ… {field} found")
                else:
                    missing_fields.append(field)
                    print(f"❌ {field} missing")

            return len(missing_fields)
        else:
            print("❌ No YAML frontmatter - required for HuggingFace Spaces")
            return 1

    except FileNotFoundError:
        print("❌ README.md not found")
        return 1

def main():
    """Run all basic tests."""
    print("πŸ§ͺ Basic Environment Test for Arabic OCR Training")
    print("=" * 60)

    total_issues = 0

    total_issues += test_basic_python_imports()
    total_issues += test_available_packages()
    total_issues += test_file_structure()
    total_issues += test_syntax_validation()
    total_issues += test_requirements_format()
    total_issues += test_readme_format()

    print("\n" + "=" * 60)
    if total_issues == 0:
        print("πŸŽ‰ All basic tests passed! Environment structure looks good.")
        print("πŸ“ Note: Some ML packages may install differently on HuggingFace Spaces.")
        print("πŸš€ Ready to push to HuggingFace Spaces!")
        return 0
    else:
        print(f"⚠️ {total_issues} issue(s) found. Review before deploying.")
        return 1

if __name__ == "__main__":
    exit_code = main()
    sys.exit(exit_code)