| |
| """ |
| Smart startup script for AI Dataset Studio |
| Automatically detects available features and chooses the best version to run |
| """ |
|
|
| import sys |
| import logging |
| import importlib |
| from typing import Dict, List, Tuple |
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s - %(levelname)s - %(message)s' |
| ) |
| logger = logging.getLogger(__name__) |
|
|
| def check_import(module_name: str, package_name: str = None) -> Tuple[bool, str]: |
| """Check if a module can be imported""" |
| try: |
| importlib.import_module(module_name) |
| return True, f"β
{module_name}" |
| except ImportError as e: |
| return False, f"β {module_name}: {str(e)}" |
|
|
| def diagnose_system() -> Dict[str, bool]: |
| """Diagnose system capabilities""" |
| logger.info("π Diagnosing system capabilities...") |
| |
| |
| essential_deps = [ |
| ('gradio', 'gradio'), |
| ('requests', 'requests'), |
| ('bs4', 'beautifulsoup4'), |
| ('pandas', 'pandas'), |
| ('numpy', 'numpy') |
| ] |
| |
| |
| optional_deps = [ |
| ('transformers', 'transformers'), |
| ('torch', 'torch'), |
| ('datasets', 'datasets'), |
| ('nltk', 'nltk'), |
| ('sentence_transformers', 'sentence-transformers') |
| ] |
| |
| results = { |
| 'essential_available': True, |
| 'ai_models_available': False, |
| 'nlp_available': False, |
| 'datasets_available': False, |
| 'missing_essential': [], |
| 'missing_optional': [] |
| } |
| |
| |
| logger.info("π Checking essential dependencies...") |
| for module, package in essential_deps: |
| available, msg = check_import(module, package) |
| logger.info(f" {msg}") |
| if not available: |
| results['essential_available'] = False |
| results['missing_essential'].append(package) |
| |
| |
| logger.info("π Checking optional dependencies...") |
| for module, package in optional_deps: |
| available, msg = check_import(module, package) |
| logger.info(f" {msg}") |
| if not available: |
| results['missing_optional'].append(package) |
| else: |
| if module in ['transformers', 'torch']: |
| results['ai_models_available'] = True |
| elif module == 'nltk': |
| results['nlp_available'] = True |
| elif module == 'datasets': |
| results['datasets_available'] = True |
| |
| return results |
|
|
| def test_gpu_availability() -> bool: |
| """Test if GPU is available""" |
| try: |
| import torch |
| gpu_available = torch.cuda.is_available() |
| if gpu_available: |
| gpu_name = torch.cuda.get_device_name(0) |
| logger.info(f"π GPU available: {gpu_name}") |
| else: |
| logger.info("π» Using CPU (GPU not available)") |
| return gpu_available |
| except ImportError: |
| logger.info("π» Using CPU (PyTorch not available)") |
| return False |
|
|
| def install_missing_packages(packages: List[str]) -> bool: |
| """Attempt to install missing packages""" |
| if not packages: |
| return True |
| |
| logger.info(f"π¦ Attempting to install missing packages: {', '.join(packages)}") |
| |
| try: |
| import subprocess |
| |
| |
| cmd = [sys.executable, "-m", "pip", "install"] + packages |
| result = subprocess.run(cmd, capture_output=True, text=True) |
| |
| if result.returncode == 0: |
| logger.info("β
Packages installed successfully!") |
| return True |
| else: |
| logger.error(f"β Installation failed: {result.stderr}") |
| return False |
| |
| except Exception as e: |
| logger.error(f"β Installation error: {e}") |
| return False |
|
|
| def run_full_version(): |
| """Run the full-featured version""" |
| logger.info("π Starting full AI Dataset Studio...") |
| try: |
| |
| import app |
| logger.info("β
Full version loaded successfully") |
| except Exception as e: |
| logger.error(f"β Full version failed: {e}") |
| raise |
|
|
| def run_minimal_version(): |
| """Run the minimal version""" |
| logger.info("π Starting minimal AI Dataset Studio...") |
| try: |
| |
| import app_minimal |
| logger.info("β
Minimal version loaded successfully") |
| except Exception as e: |
| logger.error(f"β Minimal version failed: {e}") |
| raise |
|
|
| def show_feature_summary(results: Dict[str, bool]): |
| """Show a summary of available features""" |
| logger.info("π Feature Summary:") |
| |
| if results['essential_available']: |
| logger.info(" β
Core web scraping and data processing") |
| logger.info(" β
CSV and JSON export") |
| logger.info(" β
Quality filtering and text cleaning") |
| |
| if results['ai_models_available']: |
| logger.info(" β
AI-powered sentiment analysis") |
| logger.info(" β
Named entity recognition") |
| logger.info(" β
Advanced content quality assessment") |
| else: |
| logger.info(" β οΈ AI features disabled (install transformers + torch)") |
| |
| if results['nlp_available']: |
| logger.info(" β
Advanced text processing with NLTK") |
| else: |
| logger.info(" β οΈ Basic text processing only (install nltk)") |
| |
| if results['datasets_available']: |
| logger.info(" β
HuggingFace Datasets export") |
| else: |
| logger.info(" β οΈ Standard export only (install datasets)") |
|
|
| def main(): |
| """Main startup function""" |
| print("π AI Dataset Studio - Smart Startup") |
| print("=" * 50) |
| |
| |
| results = diagnose_system() |
| |
| |
| show_feature_summary(results) |
| |
| |
| gpu_available = test_gpu_availability() |
| |
| print("\n" + "=" * 50) |
| |
| |
| if not results['essential_available']: |
| logger.error("β Essential dependencies missing!") |
| logger.error("π‘ Please install required packages:") |
| logger.error(" pip install gradio pandas requests beautifulsoup4") |
| |
| |
| user_input = input("\nπ€ Try to install missing packages automatically? (y/n): ") |
| if user_input.lower() in ['y', 'yes']: |
| if install_missing_packages(results['missing_essential']): |
| logger.info("π Restarting with new packages...") |
| |
| results = diagnose_system() |
| else: |
| logger.error("β Automatic installation failed") |
| sys.exit(1) |
| else: |
| sys.exit(1) |
| |
| |
| if results['essential_available']: |
| if results['ai_models_available']: |
| logger.info("π― Running full-featured version with AI capabilities") |
| try: |
| run_full_version() |
| except Exception as e: |
| logger.error(f"β Full version failed, falling back to minimal: {e}") |
| run_minimal_version() |
| else: |
| logger.info("π― Running minimal version (AI features not available)") |
| run_minimal_version() |
| else: |
| logger.error("β Cannot start - essential dependencies missing") |
| sys.exit(1) |
|
|
| if __name__ == "__main__": |
| try: |
| main() |
| except KeyboardInterrupt: |
| logger.info("\nπ Startup cancelled by user") |
| sys.exit(0) |
| except Exception as e: |
| logger.error(f"β Startup failed: {e}") |
| logger.error("π‘ Try running directly: python app_minimal.py") |
| sys.exit(1) |