diff --git "a/app.py" "b/app.py" deleted file mode 100644--- "a/app.py" +++ /dev/null @@ -1,2531 +0,0 @@ - -# --- START OF FILE app.py --- - -import os -import hmac -import hashlib -import json -from urllib.parse import unquote, parse_qsl, urlencode -from flask import Flask, request, jsonify, Response, send_file -import logging -import threading -import time -from datetime import datetime -from huggingface_hub import HfApi, hf_hub_download, utils as hf_utils -from werkzeug.utils import secure_filename -import requests -from io import BytesIO -import uuid -from typing import Union, Optional, Dict, List, Tuple, Any - -app = Flask(__name__) -app.secret_key = os.getenv("FLASK_SECRET_KEY", "supersecretkey_mini_app_unique") -BOT_TOKEN = os.getenv('TELEGRAM_BOT_TOKEN', '6750208873:AAE2hvPlJ99dBdhGa_Brre0IIpUdOvXxHt4') # MUST be set -DATA_FILE = 'cloudeng_mini_app_data.json' -DATA_FILE_TMP = DATA_FILE + '.tmp' -REPO_ID = "Eluza133/Z1e1u" # Same HF Repo -HF_TOKEN_WRITE = os.getenv("HF_TOKEN") -HF_TOKEN_READ = os.getenv("HF_TOKEN_READ") or HF_TOKEN_WRITE -UPLOAD_FOLDER = 'uploads_mini_app' -os.makedirs(UPLOAD_FOLDER, exist_ok=True) - -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') - -AUTH_DATA_LIFETIME = 3600 - -def find_node_by_id(filesystem: Dict[str, Any], node_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]: - if not filesystem or not isinstance(filesystem, dict): - return None, None - if filesystem.get('id') == node_id: - return filesystem, None - - queue: List[Tuple[Dict[str, Any], Optional[Dict[str, Any]]]] = [(filesystem, None)] - visited = {filesystem.get('id')} - - while queue: - current_node, parent = queue.pop(0) - if current_node.get('type') == 'folder' and 'children' in current_node: - for i, child in enumerate(current_node.get('children', [])): - if not isinstance(child, dict): - logging.warning(f"Skipping non-dict child in folder {current_node.get('id')}: {child}") - continue - child_id = child.get('id') - if not child_id: continue - - if child_id == node_id: - return child, current_node - if child_id not in visited and child.get('type') == 'folder': - visited.add(child_id) - queue.append((child, current_node)) - return None, None - -def add_node(filesystem: Dict[str, Any], parent_id: str, node_data: Dict[str, Any]) -> bool: - parent_node, _ = find_node_by_id(filesystem, parent_id) - if parent_node and parent_node.get('type') == 'folder': - if 'children' not in parent_node or not isinstance(parent_node.get('children'), list): - parent_node['children'] = [] - existing_ids = {child.get('id') for child in parent_node['children'] if isinstance(child, dict)} - if node_data.get('id') not in existing_ids: - parent_node['children'].append(node_data) - return True - else: - logging.warning(f"Attempted to add node with duplicate ID {node_data.get('id')} to parent {parent_id}") - return False # Indicate failure due to duplicate ID - elif not parent_node: - logging.error(f"Parent node {parent_id} not found when trying to add node {node_data.get('id')}") - elif parent_node.get('type') != 'folder': - logging.error(f"Attempted to add node {node_data.get('id')} to a non-folder parent {parent_id}") - return False - - -def remove_node(filesystem: Dict[str, Any], node_id: str) -> bool: - node_to_remove, parent_node = find_node_by_id(filesystem, node_id) - - if node_to_remove and parent_node and 'children' in parent_node and isinstance(parent_node['children'], list): - original_length = len(parent_node['children']) - parent_node['children'] = [child for child in parent_node['children'] if not isinstance(child, dict) or child.get('id') != node_id] - return len(parent_node['children']) < original_length - - if node_to_remove and node_id == filesystem.get('id'): - logging.warning("Attempted to remove root node directly. This is not allowed.") - return False - - if not node_to_remove: - logging.warning(f"Node {node_id} not found for removal.") - return False - if not parent_node: - logging.warning(f"Parent not found for node {node_id} during removal.") - return False - - return False - - -def get_node_path_list(filesystem: Dict[str, Any], node_id: str) -> List[Dict[str, str]]: - path_list = [] - current_id = node_id - processed_ids = set() - max_depth = 20 - count = 0 - - while current_id and current_id not in processed_ids and count < max_depth: - processed_ids.add(current_id) - count += 1 - node, parent = find_node_by_id(filesystem, current_id) - if not node: - logging.warning(f"Node {current_id} not found while building path.") - break - path_list.append({ - 'id': node.get('id', ''), - 'name': node.get('name', node.get('original_filename', 'Unknown')) - }) - if not parent: - if current_id != filesystem.get('id'): - logging.warning(f"Node {current_id} found but its parent is missing (and it's not root).") - break - parent_id = parent.get('id') - if parent_id == current_id: - logging.error(f"Filesystem loop detected at node {current_id}") - break - current_id = parent_id - - if count >= max_depth: - logging.error(f"Max depth reached while building path for node {node_id}. Possible loop or deep structure.") - - # Ensure root is present if not already found - if not any(p.get('id') == 'root' for p in path_list): - root_node, _ = find_node_by_id(filesystem, 'root') - root_name = root_node.get('name', 'Root') if root_node else 'Root' - path_list.append({'id': 'root', 'name': root_name}) - - # Reverse and remove duplicates (keeping first occurrence from root) - final_path = [] - seen_ids = set() - for item in reversed(path_list): - item_id = item.get('id') - if item_id and item_id not in seen_ids: - final_path.append(item) - seen_ids.add(item_id) - - # Make sure root is always first if it exists - if final_path and final_path[0].get('id') != 'root': - root_index = -1 - for i, item in enumerate(final_path): - if item.get('id') == 'root': - root_index = i - break - if root_index > 0: - root_item = final_path.pop(root_index) - final_path.insert(0, root_item) - elif root_index == -1: # Root wasn't in the path, force add it - root_node, _ = find_node_by_id(filesystem, 'root') - root_name = root_node.get('name', 'Root') if root_node else 'Root' - final_path.insert(0, {'id': 'root', 'name': root_name}) - - return final_path - -def initialize_user_filesystem(user_data: Dict[str, Any]): - if 'filesystem' not in user_data or not isinstance(user_data['filesystem'], dict) or not user_data['filesystem']: - user_data['filesystem'] = { - "type": "folder", - "id": "root", - "name": "Root", - "children": [] - } - elif 'id' not in user_data['filesystem'] or user_data['filesystem']['id'] != 'root': - logging.warning(f"User filesystem exists but has invalid root. Resetting. User: {user_data.get('user_info', {}).get('id')}") - user_data['filesystem'] = { - "type": "folder", - "id": "root", - "name": "Root", - "children": [] - } - # Ensure essential root keys exist - user_data['filesystem'].setdefault('type', 'folder') - user_data['filesystem'].setdefault('id', 'root') - user_data['filesystem'].setdefault('name', 'Root') - user_data['filesystem'].setdefault('children', []) - - -_data_cache = None -_data_lock = threading.Lock() - -def load_data() -> Dict[str, Any]: - global _data_cache - with _data_lock: - if _data_cache: - # Perform a quick sanity check on cached data before returning - if isinstance(_data_cache, dict) and 'users' in _data_cache: - # Optionally re-initialize filesystems if needed, though should be done on load - # for user_data in _data_cache.get('users', {}).values(): - # initialize_user_filesystem(user_data) - return _data_cache - - logging.info("Cache miss or invalid cache. Loading data from source.") - loaded_data = None - try: - download_db_from_hf() - if os.path.exists(DATA_FILE): - with open(DATA_FILE, 'r', encoding='utf-8') as file: - loaded_data = json.load(file) - if not isinstance(loaded_data, dict): - logging.warning(f"{DATA_FILE} content is not a dict. Attempting to load backup or init empty.") - loaded_data = None # Force re-init or backup check - # Basic structure check - elif 'users' not in loaded_data or not isinstance(loaded_data['users'], dict): - logging.warning(f"{DATA_FILE} missing 'users' dict. Initializing.") - loaded_data = {'users': {}} - - else: - logging.warning(f"{DATA_FILE} not found locally after download attempt.") - # Optionally check for backup file here if needed - - except FileNotFoundError: - logging.warning(f"{DATA_FILE} not found locally. Will initialize empty data.") - except json.JSONDecodeError: - logging.error(f"Error decoding JSON from {DATA_FILE}. File might be corrupted.") - # Optionally try loading DATA_FILE_TMP if it exists and seems valid - except Exception as e: - logging.error(f"Unexpected error loading data: {e}") - - # If loading failed or file was invalid, initialize empty structure - if loaded_data is None: - logging.warning("Initializing empty data structure.") - loaded_data = {'users': {}} - - # Ensure basic structure and initialize filesystems for all users - loaded_data.setdefault('users', {}) - for user_id, user_data in loaded_data['users'].items(): - if isinstance(user_data, dict): - initialize_user_filesystem(user_data) - else: - logging.warning(f"Invalid data type for user {user_id}. Skipping.") - # Optionally remove invalid user entry here - - _data_cache = loaded_data - logging.info(f"Data loaded successfully. User count: {len(_data_cache.get('users', {}))}") - return _data_cache - -def save_data(data: Dict[str, Any]): - with _data_lock: - if not isinstance(data, dict) or 'users' not in data: - logging.error("Attempted to save invalid data structure. Aborting save.") - # Optionally raise an exception here to signal the problem higher up - return - - try: - # Write to temporary file first - with open(DATA_FILE_TMP, 'w', encoding='utf-8') as file: - json.dump(data, file, ensure_ascii=False, indent=4) - - # If temporary write succeeds, replace original file - os.replace(DATA_FILE_TMP, DATA_FILE) - logging.info(f"Data saved locally to {DATA_FILE}") - - # Update cache - global _data_cache - _data_cache = data # Keep the in-memory version consistent - - # Schedule HF upload (run in background) - upload_thread = threading.Thread(target=upload_db_to_hf_thread, args=(DATA_FILE,)) - upload_thread.start() - - except Exception as e: - logging.error(f"Error saving data locally: {e}") - # Consider what to do if save fails - log, maybe try again? - # If DATA_FILE_TMP exists, it might contain the data that failed to replace. - if os.path.exists(DATA_FILE_TMP): - logging.warning(f"Temporary save file {DATA_FILE_TMP} may still exist.") - # Do NOT clear cache here, as the save failed. Keep the last known good state. - raise # Re-raise the exception so the calling function knows saving failed - - -def upload_db_to_hf_thread(local_path: str): - """Worker function for uploading DB to HF in a separate thread.""" - if not HF_TOKEN_WRITE: - logging.warning("HF_TOKEN_WRITE not set, skipping database upload.") - return - if not os.path.exists(local_path): - logging.error(f"Local file {local_path} not found for HF upload.") - return - try: - api = HfApi() - logging.info(f"Starting background upload of {local_path} to HF repo {REPO_ID}...") - api.upload_file( - path_or_fileobj=local_path, - path_in_repo=os.path.basename(local_path), # Use just the filename for path_in_repo - repo_id=REPO_ID, - repo_type="dataset", - token=HF_TOKEN_WRITE, - commit_message=f"Backup MiniApp {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" - # removed run_as_future=True as we are already in a thread - ) - logging.info("Database upload to Hugging Face completed.") - except Exception as e: - logging.error(f"Error during background database upload: {e}") - - -def download_db_from_hf(): - if not HF_TOKEN_READ: - logging.warning("HF_TOKEN_READ not set, skipping database download.") - if not os.path.exists(DATA_FILE): - try: - with open(DATA_FILE, 'w', encoding='utf-8') as f: - json.dump({'users': {}}, f) - logging.info(f"Created empty local database file: {DATA_FILE}") - except Exception as e: - logging.error(f"Failed to create empty local database file: {e}") - return - try: - logging.info(f"Attempting to download {DATA_FILE} from HF repo {REPO_ID}...") - hf_hub_download( - repo_id=REPO_ID, - filename=DATA_FILE, - repo_type="dataset", - token=HF_TOKEN_READ, - local_dir=".", - local_dir_use_symlinks=False, - force_download=True, # Ensure we get the latest version - etag_timeout=10 - ) - logging.info("Database downloaded successfully from Hugging Face.") - except hf_utils.RepositoryNotFoundError: - logging.error(f"Repository {REPO_ID} not found on Hugging Face.") - if not os.path.exists(DATA_FILE): - with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump({'users': {}}, f) - except hf_utils.EntryNotFoundError: - logging.warning(f"{DATA_FILE} not found in repo {REPO_ID}. Using/Creating local version.") - if not os.path.exists(DATA_FILE): - with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump({'users': {}}, f) - except requests.exceptions.ConnectionError as e: - logging.error(f"Connection error downloading DB from HF: {e}. Using local version if available.") - except Exception as e: - logging.error(f"Generic error downloading database from HF: {e}") - if not os.path.exists(DATA_FILE): - logging.info("Creating empty local DB file as download failed and none exists.") - with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump({'users': {}}, f) - -def get_file_type(filename: Optional[str]) -> str: - if not filename or '.' not in filename: return 'other' - ext = filename.lower().split('.')[-1] - if ext in ['mp4', 'mov', 'avi', 'webm', 'mkv', 'wmv', 'flv', 'mpeg', 'mpg']: return 'video' - if ext in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp', 'svg', 'heic', 'tiff']: return 'image' - if ext == 'pdf': return 'pdf' - if ext in ['txt', 'md', 'log', 'csv', 'json', 'xml', 'html', 'css', 'js', 'py', 'java', 'c', 'cpp', 'h']: return 'text' - if ext in ['doc', 'docx', 'rtf']: return 'document' - if ext in ['xls', 'xlsx']: return 'spreadsheet' - if ext in ['ppt', 'pptx']: return 'presentation' - if ext in ['zip', 'rar', '7z', 'gz', 'tar']: return 'archive' - if ext in ['mp3', 'wav', 'ogg', 'aac', 'flac', 'm4a']: return 'audio' - return 'other' - -def check_telegram_authorization(auth_data: str, bot_token: str) -> Optional[Dict[str, Any]]: - if not auth_data or not bot_token or bot_token == 'YOUR_BOT_TOKEN': - logging.warning("Validation skipped: Missing auth_data or valid BOT_TOKEN.") - # In production, you should strictly return None here. For debugging, you might bypass. - # return {"id": "12345", "first_name": "Debug", "last_name": "User", "username": "debug_user"} # DEBUG ONLY - return None - - try: - parsed_data = dict(parse_qsl(unquote(auth_data))) - if "hash" not in parsed_data: - logging.error("Hash not found in auth data") - return None - - telegram_hash = parsed_data.pop('hash') - auth_date_ts = int(parsed_data.get('auth_date', 0)) - current_ts = int(time.time()) - - if abs(current_ts - auth_date_ts) > AUTH_DATA_LIFETIME: - logging.warning(f"Auth data expired (Auth: {auth_date_ts}, Now: {current_ts}, Diff: {current_ts - auth_date_ts}, Limit: {AUTH_DATA_LIFETIME})") - return None - - data_check_string = "\n".join(sorted([f"{k}={v}" for k, v in parsed_data.items()])) - secret_key = hmac.new("WebAppData".encode(), bot_token.encode(), hashlib.sha256).digest() - calculated_hash = hmac.new(secret_key, data_check_string.encode(), hashlib.sha256).hexdigest() - - if hmac.compare_digest(calculated_hash, telegram_hash): - user_data_str = parsed_data.get('user') - if user_data_str: - try: - user_info = json.loads(user_data_str) - if 'id' not in user_info: - logging.error("Validated user data missing 'id'") - return None - # Add auth_date to user_info for potential future use - user_info['auth_date'] = auth_date_ts - logging.info(f"Telegram validation successful for user ID: {user_info['id']}") - return user_info - except json.JSONDecodeError: - logging.error("Failed to decode user JSON from auth data") - return None - else: - logging.warning("No 'user' field in validated auth data") - return None - else: - logging.warning("Hash mismatch during validation") - # Log details for debugging if needed, carefully avoiding sensitive data - # logging.debug(f"Hash mismatch. Received: {telegram_hash}, Calculated: {calculated_hash}, DataString: {data_check_string[:100]}...") - return None - except Exception as e: - logging.error(f"Exception during validation: {e}", exc_info=True) - return None - - -HTML_TEMPLATE = """ - - - - - - Zeus Cloud - - - - - -
- - - - - - - - - - -

Загрузка...

-
- - -
-

Zeus Cloud

-
- - - -
- - -
- -
-
Загрузка файлов
-
-
- - -
-
0%
-
-
- -
-
Содержимое папки
-
-
-
-
-
-
- - - -
- -
- - - - -""" - - -@app.route('/') -def index(): - return Response(HTML_TEMPLATE, mimetype='text/html') - -@app.route('/validate_init_data', methods=['POST']) -def validate_init_data(): - data = request.get_json() - if not data or 'initData' not in data: - return jsonify({"status": "error", "message": "Missing initData"}), 400 - - init_data_str = data['initData'] - user_info = check_telegram_authorization(init_data_str, BOT_TOKEN) - - if user_info and 'id' in user_info: - tg_user_id = str(user_info['id']) - needs_save = False - try: - db_data = load_data() # Load current data - users = db_data.setdefault('users', {}) - - # Ensure user exists and has basic structure - if tg_user_id not in users or not isinstance(users.get(tg_user_id), dict): - logging.info(f"New user or invalid data for: {tg_user_id}. Initializing.") - users[tg_user_id] = { - 'user_info': {}, - 'created_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - 'filesystem': {} # Will be initialized below - } - needs_save = True - - user_data = users[tg_user_id] - - # Update user info if different or missing - if user_data.get('user_info') != user_info: - user_data['user_info'] = user_info - needs_save = True - - # Initialize filesystem if missing or invalid - if 'filesystem' not in user_data or not isinstance(user_data['filesystem'], dict) or not user_data['filesystem']: - initialize_user_filesystem(user_data) - needs_save = True - - # Perform save only if changes were made - if needs_save: - save_data(db_data) - logging.info(f"User data initialized/updated and saved for {tg_user_id}") - - return jsonify({"status": "ok", "user": user_info}) - - except Exception as e: - # Catch potential errors during load/save for this specific validation - logging.error(f"Error during user validation/initialization for {tg_user_id}: {e}", exc_info=True) - return jsonify({"status": "error", "message": f"Ошибка сервера при обработке данных пользователя: {e}"}), 500 - - else: - logging.warning(f"Validation failed for initData starting with: {init_data_str[:100]}...") - return jsonify({"status": "error", "message": "Недействительные данные авторизации Telegram."}), 403 - - -@app.route('/get_dashboard_data', methods=['POST']) -def get_dashboard_data(): - data = request.get_json() - if not data or 'initData' not in data or 'folder_id' not in data: - return jsonify({"status": "error", "message": "Неполный запрос"}), 400 - - user_info = check_telegram_authorization(data['initData'], BOT_TOKEN) - if not user_info or 'id' not in user_info: - return jsonify({"status": "error", "message": "Не авторизован"}), 403 - - tg_user_id = str(user_info['id']) - requested_folder_id = data['folder_id'] - - try: - db_data = load_data() - user_data = db_data.get('users', {}).get(tg_user_id) - - if not user_data or 'filesystem' not in user_data: - # This case should ideally be handled by validate_init_data, but double-check - logging.error(f"Filesystem missing for validated user {tg_user_id}. Attempting re-initialization.") - # Try to re-initialize and save - if user_data is None: user_data = {} # Create if totally missing - initialize_user_filesystem(user_data) - db_data.setdefault('users', {})[tg_user_id] = user_data # Ensure it's in db_data - save_data(db_data) - # Now user_data should have a filesystem - logging.info(f"Re-initialized filesystem for user {tg_user_id}") - - # Find the requested folder, default to root if not found or invalid - current_folder, _ = find_node_by_id(user_data['filesystem'], requested_folder_id) - folder_id_to_use = requested_folder_id - - if not current_folder or current_folder.get('type') != 'folder': - logging.warning(f"Folder {requested_folder_id} invalid or not found for user {tg_user_id}. Defaulting to root.") - folder_id_to_use = 'root' - current_folder, _ = find_node_by_id(user_data['filesystem'], folder_id_to_use) - if not current_folder: # This is critical if root is missing after initialization - logging.error(f"CRITICAL: Root folder not found for user {tg_user_id} even after potential re-init.") - # Attempt one last re-init and save, then fail if still bad - initialize_user_filesystem(user_data) - save_data(db_data) - current_folder, _ = find_node_by_id(user_data['filesystem'], folder_id_to_use) - if not current_folder: - return jsonify({"status": "error", "message": "Критическая ошибка: Корневая папка отсутствует и не может быть создана."}), 500 - - items_in_folder = current_folder.get('children', []) - # Ensure items are dicts (filter out potential bad data) - items_in_folder = [item for item in items_in_folder if isinstance(item, dict)] - - breadcrumbs = get_node_path_list(user_data['filesystem'], folder_id_to_use) - - current_folder_info = { - 'id': current_folder.get('id'), - 'name': current_folder.get('name', 'Root') # Default name if missing - } - - return jsonify({ - "status": "ok", - "items": items_in_folder, - "breadcrumbs": breadcrumbs, - "current_folder": current_folder_info - }) - - except Exception as e: - logging.error(f"Error fetching dashboard data for user {tg_user_id}, folder {requested_folder_id}: {e}", exc_info=True) - return jsonify({"status": "error", "message": f"Ошибка сервера при получении данных: {e}"}), 500 - - -@app.route('/upload', methods=['POST']) -def upload_files(): - # Use request.form for initData and folder_id from FormData - init_data = request.form.get('initData') - current_folder_id = request.form.get('current_folder_id', 'root') - files = request.files.getlist('files') - - if not init_data: - return jsonify({"status": "error", "message": "Отсутствуют данные авторизации"}), 400 - - user_info = check_telegram_authorization(init_data, BOT_TOKEN) - if not user_info or 'id' not in user_info: - return jsonify({"status": "error", "message": "Не авторизован"}), 403 - - tg_user_id = str(user_info['id']) - - if not HF_TOKEN_WRITE: - logging.error("Upload failed: HF_TOKEN_WRITE not configured.") - return jsonify({'status': 'error', 'message': 'Загрузка файлов временно недоступна (ошибка конфигурации).'}), 503 # Service Unavailable - - if not files or all(not f.filename for f in files): - return jsonify({'status': 'error', 'message': 'Файлы для загрузки не выбраны.'}), 400 - - # Load data ONCE before the loop - try: - db_data = load_data() - user_data = db_data.get('users', {}).get(tg_user_id) - - if not user_data or 'filesystem' not in user_data: - logging.error(f"Filesystem missing for validated user {tg_user_id} during upload.") - # Attempt re-initialization before failing - if user_data is None: user_data = {} - initialize_user_filesystem(user_data) - db_data.setdefault('users', {})[tg_user_id] = user_data - save_data(db_data) # Save the re-initialized state - logging.info(f"Re-initialized filesystem for user {tg_user_id} during upload.") - # Reload user_data after potential save - user_data = db_data.get('users', {}).get(tg_user_id) - if not user_data or 'filesystem' not in user_data: # If still missing, fail - return jsonify({"status": "error", "message": "Критическая ошибка данных пользователя при загрузке."}), 500 - - target_folder_node, _ = find_node_by_id(user_data['filesystem'], current_folder_id) - if not target_folder_node or target_folder_node.get('type') != 'folder': - logging.warning(f"Target folder {current_folder_id} not found or invalid for user {tg_user_id}. Upload rejected.") - return jsonify({'status': 'error', 'message': 'Целевая папка не найдена!'}), 404 - - except Exception as e: - logging.error(f"Error preparing for upload for user {tg_user_id}: {e}", exc_info=True) - return jsonify({'status': 'error', 'message': f'Ошибка подготовки к загрузке: {e}'}), 500 - - - api = HfApi() - uploaded_count = 0 - errors = [] - needs_save = False # Flag to save DB only if changes were made - - for file in files: - if file and file.filename: - original_filename = secure_filename(file.filename) - if not original_filename: - logging.warning(f"Skipping file with potentially unsafe name after securing: {file.filename}") - errors.append(f"Пропуск файла с небезопасным именем: {file.filename}") - continue - - name_part, ext_part = os.path.splitext(original_filename) - unique_suffix = uuid.uuid4().hex[:8] - # Keep filename structure simpler, rely on UUID for uniqueness if needed - # unique_filename = f"{name_part}_{unique_suffix}{ext_part}" - unique_filename = original_filename # Use original secured name for HF path for clarity? Risk of collision if user uploads same name twice. Let's add UUID. - unique_internal_filename = f"{name_part}_{unique_suffix}{ext_part}" - - - file_id = uuid.uuid4().hex - # Structure HF path more logically: files / user_id / file_id / filename - # This prevents issues with folder names containing special chars and filename collisions. - # Using file_id ensures uniqueness at the storage level. - hf_path = f"user_files/{tg_user_id}/{file_id}/{unique_internal_filename}" - temp_path = os.path.join(UPLOAD_FOLDER, f"{file_id}_{unique_internal_filename}") - - try: - file.seek(0) # Ensure reading from the start - file.save(temp_path) - logging.info(f"Uploading {original_filename} ({file_id}) to HF path: {hf_path} for user {tg_user_id}") - - api.upload_file( - path_or_fileobj=temp_path, - path_in_repo=hf_path, - repo_id=REPO_ID, - repo_type="dataset", - token=HF_TOKEN_WRITE, - commit_message=f"User {tg_user_id} uploaded {original_filename} (ID: {file_id})" - # run_as_future=True # Consider if parallel uploads are needed and safe - ) - logging.info(f"Successfully uploaded {original_filename} ({file_id}) to HF.") - - file_info = { - 'type': 'file', - 'id': file_id, - 'original_filename': original_filename, - 'hf_path': hf_path, # Store the HF path used - 'file_type': get_file_type(original_filename), - 'upload_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'), - # Do NOT store unique_internal_filename unless needed for display logic - # 'unique_filename': unique_internal_filename - } - - # Add node to the IN-MEMORY db_data - if add_node(user_data['filesystem'], current_folder_id, file_info): - uploaded_count += 1 - needs_save = True # Mark that DB needs saving - logging.info(f"Added metadata for file {file_id} to folder {current_folder_id} for user {tg_user_id}.") - else: - # This case (add_node returning False) might mean duplicate ID, which shouldn't happen with UUIDs. - # More likely, parent folder disappeared or became invalid mid-request (very unlikely). - errors.append(f"Критическая ошибка: не удалось добавить метаданные для {original_filename}.") - logging.error(f"Failed add_node for {file_id} to {current_folder_id} for {tg_user_id}. Filesystem state: {user_data['filesystem']}") - # Attempt to clean up orphaned HF file - try: - logging.warning(f"Attempting cleanup of potentially orphaned HF file: {hf_path}") - api.delete_file(path_in_repo=hf_path, repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN_WRITE) - except Exception as del_err: - logging.error(f"Failed deleting orphaned HF file {hf_path} after add_node failure: {del_err}") - - except Exception as e: - logging.error(f"Upload processing error for {original_filename} (User: {tg_user_id}): {e}", exc_info=True) - errors.append(f"Ошибка загрузки {original_filename}: {str(e)[:100]}") # Keep error messages concise - # If upload to HF failed, no need to delete, it wasn't uploaded. - # If add_node failed, cleanup is attempted above. - finally: - # Clean up local temporary file regardless of success/failure - if os.path.exists(temp_path): - try: - os.remove(temp_path) - except OSError as e_remove: - logging.error(f"Error removing temp file {temp_path}: {e_remove}") - - # Save the entire DB state ONCE after processing all files, only if changes occurred - if needs_save: - try: - save_data(db_data) - logging.info(f"DB saved successfully after uploading {uploaded_count} files for user {tg_user_id}.") - except Exception as e: - logging.error(f"CRITICAL: Error saving DB after successful uploads for user {tg_user_id}: {e}", exc_info=True) - errors.append("Критическая ошибка: Не удалось сохранить обновленную информацию о файлах.") - # At this point, files are on HF but not in DB. Manual recovery might be needed. - - final_message = f"Обработано файлов: {len(files)}. Успешно загружено: {uploaded_count}." - if errors: - error_summary = "; ".join(errors[:3]) # Show first few errors - if len(errors) > 3: error_summary += f" (и еще {len(errors)-3})" - final_message += f" Ошибки: {error_summary}" - logging.warning(f"Upload completed with errors for user {tg_user_id}. Errors: {errors}") - - return jsonify({ - "status": "ok" if uploaded_count > 0 and not errors else ("partial" if uploaded_count > 0 and errors else "error"), - "message": final_message - }) - -@app.route('/create_folder', methods=['POST']) -def create_folder(): - data = request.get_json() - if not data or 'initData' not in data or 'parent_folder_id' not in data or 'folder_name' not in data: - return jsonify({"status": "error", "message": "Неполный запрос"}), 400 - - user_info = check_telegram_authorization(data['initData'], BOT_TOKEN) - if not user_info or 'id' not in user_info: - return jsonify({"status": "error", "message": "Не авторизован"}), 403 - - tg_user_id = str(user_info['id']) - parent_folder_id = data['parent_folder_id'] - folder_name = data['folder_name'].strip() - - if not folder_name: - return jsonify({'status': 'error', 'message': 'Имя папки не может быть пустым!'}), 400 - # Basic validation for problematic characters - if '/' in folder_name or '\\' in folder_name or ':' in folder_name or '<' in folder_name or '>' in folder_name: - return jsonify({'status': 'error', 'message': 'Имя папки содержит недопустимые символы.'}), 400 - if len(folder_name) > 255: # Filesystem limits often around 255 bytes/chars - return jsonify({'status': 'error', 'message': 'Имя папки слишком длинное.'}), 400 - - try: - db_data = load_data() - user_data = db_data.get('users', {}).get(tg_user_id) - if not user_data or 'filesystem' not in user_data: - logging.error(f"Filesystem missing for validated user {tg_user_id} during folder creation.") - # Try re-init before failing - if user_data is None: user_data = {} - initialize_user_filesystem(user_data) - db_data.setdefault('users', {})[tg_user_id] = user_data - save_data(db_data) - logging.info(f"Re-initialized filesystem for user {tg_user_id} during folder creation.") - user_data = db_data.get('users', {}).get(tg_user_id) - if not user_data or 'filesystem' not in user_data: - return jsonify({"status": "error", "message": "Критическая ошибка данных пользователя при создании папки."}), 500 - - # Check if folder with the same name already exists in the parent - parent_node, _ = find_node_by_id(user_data['filesystem'], parent_folder_id) - if parent_node and parent_node.get('type') == 'folder': - existing_names = { - child.get('name', '').lower() - for child in parent_node.get('children', []) - if isinstance(child, dict) and child.get('type') == 'folder' - } - if folder_name.lower() in existing_names: - return jsonify({'status': 'error', 'message': f'Папка с именем "{folder_name}" уже существует здесь.'}), 409 # Conflict - - - folder_id = uuid.uuid4().hex - folder_data = { - 'type': 'folder', - 'id': folder_id, - 'name': folder_name, - 'children': [], - 'created_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S') # Add creation time - } - - # Add node in memory - if add_node(user_data['filesystem'], parent_folder_id, folder_data): - # Save the updated data - save_data(db_data) - logging.info(f"Folder '{folder_name}' ({folder_id}) created for user {tg_user_id} in parent {parent_folder_id}.") - return jsonify({'status': 'ok', 'message': f'Папка "{folder_name}" создана.', 'new_folder': folder_data}) - else: - # add_node failed, likely parent not found or invalid - logging.error(f"Failed to add folder node '{folder_name}' ({folder_id}) to parent {parent_folder_id} for user {tg_user_id}.") - parent_node_check, _ = find_node_by_id(user_data['filesystem'], parent_folder_id) # Re-check parent existence - if not parent_node_check: - return jsonify({'status': 'error', 'message': 'Не удалось найти родительскую папку.'}), 404 - elif parent_node_check.get('type') != 'folder': - return jsonify({'status': 'error', 'message': 'Невозможно создать папку внутри файла.'}), 400 - else: # Should not happen if parent exists and is a folder - return jsonify({'status': 'error', 'message': 'Неизвестная ошибка при добавлении папки.'}), 500 - - except Exception as e: - logging.error(f"Error creating folder '{folder_name}' for user {tg_user_id}: {e}", exc_info=True) - return jsonify({'status': 'error', 'message': f'Ошибка сервера при создании папки: {e}'}), 500 - -def find_file_owner_and_node(file_id: str) -> Tuple[Optional[str], Optional[Dict[str, Any]]]: - """ Helper to find file node and its owner's ID across all users. """ - try: - db_data = load_data() - for user_id, user_data in db_data.get('users', {}).items(): - if isinstance(user_data, dict) and 'filesystem' in user_data: - node, _ = find_node_by_id(user_data['filesystem'], file_id) - if node and node.get('type') == 'file': - return user_id, node - except Exception as e: - logging.error(f"Error searching for file {file_id} in load_data: {e}", exc_info=True) - return None, None - - -@app.route('/download/') -def download_file_route(file_id): - owner_user_id, file_node = find_file_owner_and_node(file_id) - - if not file_node: - return Response("Файл не найден", status=404, mimetype='text/plain') - - # Use hf_path stored during upload - hf_path = file_node.get('hf_path') - original_filename = file_node.get('original_filename', f'{file_id}_download') - - if not hf_path: - logging.error(f"Missing HF path (hf_path) for file ID {file_id} (owner: {owner_user_id})") - return Response("Ошибка сервера: Путь к файлу не найден в метаданных.", status=500, mimetype='text/plain') - - # Construct the direct download URL (can bypass resolve for direct links if path is correct) - # file_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{hf_path}?download=true" - # Use LFS pointer URL format (more robust for large files, potentially) - needs testing - file_url = f"https://huggingface.co/datasets/{REPO_ID}/raw/main/{hf_path}" # Changed resolve to raw - - - try: - headers = {} - if HF_TOKEN_READ: - headers["authorization"] = f"Bearer {HF_TOKEN_READ}" - - # Use stream=True for large files - response = requests.get(file_url, headers=headers, stream=True, timeout=60) # Increased timeout - response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx) - - # Correctly encode filename for Content-Disposition - # Standard ASCII fallback + UTF-8 version - try: - # Simple ASCII version for basic compatibility - ascii_filename = original_filename.encode('ascii', 'ignore').decode('ascii') - except UnicodeEncodeError: - ascii_filename = f"file_{file_id}" # Fallback if name is purely non-ASCII - - utf8_filename_encoded = urlencode({'filename': original_filename}, encoding='utf-8')[9:] # Get the encoded part - disposition = f'attachment; filename="{ascii_filename}"; filename*=UTF-8\'\'{utf8_filename_encoded}' - - - return Response(response.iter_content(chunk_size=65536), # 64KB chunks - mimetype=response.headers.get('Content-Type', 'application/octet-stream'), - headers={"Content-Disposition": disposition}) - - except requests.exceptions.RequestException as e: - status_code = 502 # Bad Gateway default - error_message = f"Ошибка скачивания файла с сервера ({e.__class__.__name__})" - if e.response is not None: - status_code = e.response.status_code - if status_code == 404: - error_message = "Файл не найден на сервере хранения." - logging.warning(f"HF file not found (404) at path {hf_path} for file ID {file_id}, owner {owner_user_id}.") - else: - error_message = f"Ошибка сервера хранения ({status_code})" - # Log response body for non-404 errors if helpful? Careful with large bodies. - # error_details = e.response.text[:200] if e.response.text else '' - # logging.error(f"Error downloading file from HF ({hf_path}, owner: {owner_user_id}): Status {status_code}, Response: {error_details}..., Exception: {e}") - - else: - logging.error(f"Network or request error downloading file from HF ({hf_path}, owner: {owner_user_id}): {e}") - - return Response(error_message, status=status_code, mimetype='text/plain') - except Exception as e: - logging.error(f"Unexpected error during file download ({hf_path}, owner: {owner_user_id}): {e}", exc_info=True) - return Response("Внутренняя ошибка сервера при обработке запроса на скачивание.", status=500, mimetype='text/plain') - - -@app.route('/delete_file/', methods=['POST']) -def delete_file_route(file_id): - data = request.get_json() - if not data or 'initData' not in data: # current_folder_id not strictly needed for delete by ID - return jsonify({"status": "error", "message": "Неполный запрос (отсутствует initData)"}), 400 - - user_info = check_telegram_authorization(data['initData'], BOT_TOKEN) - if not user_info or 'id' not in user_info: - return jsonify({"status": "error", "message": "Не авторизован"}), 403 - - tg_user_id = str(user_info['id']) - - if not HF_TOKEN_WRITE: - logging.error(f"Delete rejected for user {tg_user_id}: HF_TOKEN_WRITE not configured.") - return jsonify({'status': 'error', 'message': 'Удаление файлов временно недоступно (ошибка конфигурации).'}), 503 - - try: - db_data = load_data() - user_data = db_data.get('users', {}).get(tg_user_id) - - if not user_data or 'filesystem' not in user_data: - # This indicates a problem, user should exist if validation passed - logging.error(f"User data or filesystem missing for validated user {tg_user_id} during delete file.") - return jsonify({"status": "error", "message": "Ошибка данных пользователя при удалении."}), 500 - - file_node, parent_node = find_node_by_id(user_data['filesystem'], file_id) - - if not file_node or file_node.get('type') != 'file': - # Check if the file exists under *another* user (shouldn't happen with UUIDs, but check) - other_owner, other_node = find_file_owner_and_node(file_id) - if other_node: - logging.warning(f"User {tg_user_id} attempted to delete file {file_id} owned by {other_owner}.") - return jsonify({'status': 'error', 'message': 'У вас нет прав на удаление этого файла.'}), 403 - else: - logging.warning(f"File node {file_id} not found for deletion attempt by user {tg_user_id}.") - return jsonify({'status': 'error', 'message': 'Файл не найден.'}), 404 - - if not parent_node: - # This is an inconsistency, file exists but parent doesn't? - logging.error(f"Inconsistency: File node {file_id} found for user {tg_user_id}, but parent node is missing.") - # Allow deletion from HF, but log error about DB state. - # Fall through to HF deletion, but DB removal might fail. - - - hf_path = file_node.get('hf_path') - original_filename = file_node.get('original_filename', 'файл') - needs_save = False - hf_delete_error = None - - # Step 1: Attempt to delete from Hugging Face Hub - if hf_path: - try: - api = HfApi() - logging.info(f"Attempting to delete HF file {hf_path} for file ID {file_id}, user {tg_user_id}") - api.delete_file( - path_in_repo=hf_path, - repo_id=REPO_ID, - repo_type="dataset", - token=HF_TOKEN_WRITE, - commit_message=f"User {tg_user_id} deleted file {original_filename} (ID: {file_id})" - ) - logging.info(f"Successfully deleted file {hf_path} from HF Hub for user {tg_user_id}") - except hf_utils.EntryNotFoundError: - logging.warning(f"File {hf_path} (ID: {file_id}) not found on HF Hub for delete attempt by user {tg_user_id}. Proceeding with DB removal.") - # This is okay, file might have been deleted manually or failed upload previously - except Exception as e: - hf_delete_error = e - logging.error(f"Error deleting file from HF Hub ({hf_path}, user {tg_user_id}): {e}", exc_info=True) - # Decide whether to proceed with DB removal despite HF error. - # For now, let's proceed but report the error. - else: - logging.warning(f"No hf_path found for file {file_id} (user {tg_user_id}). Skipping HF deletion.") - - # Step 2: Attempt to remove node from the in-memory database structure - if remove_node(user_data['filesystem'], file_id): - needs_save = True - logging.info(f"Removed file node {file_id} from DB structure for user {tg_user_id}") - else: - # This means the node wasn't found by remove_node, even though find_node_by_id found it earlier. - # Indicates a potential race condition or bug in node finding/removal. - logging.error(f"Inconsistency: Failed to remove file node {file_id} from DB structure for {tg_user_id} after it was initially found.") - # If HF deletion failed, this is less critical. If HF succeeded, we have an orphaned entry in DB. - - # Step 3: Save the database if changes were made - save_error = None - if needs_save: - try: - save_data(db_data) - logging.info(f"DB saved successfully after removing file {file_id} for user {tg_user_id}.") - except Exception as e: - save_error = e - logging.error(f"CRITICAL: Error saving DB after removing file node {file_id} for user {tg_user_id}: {e}", exc_info=True) - # DB state might be inconsistent now. - - # Step 4: Determine final status and message - if not needs_save and not hf_delete_error: - # This happens if remove_node failed (e.g., inconsistency) AND HF delete also failed or was skipped. - return jsonify({'status': 'error', 'message': 'Не удалось удалить файл (возможно, он уже удален или ошибка данных).'}), 500 - elif save_error: - return jsonify({'status': 'error', 'message': f'Файл удален с сервера (ошибка HF: {hf_delete_error}), но КРИТИЧЕСКАЯ ОШИБКА сохранения базы данных: {save_error}'}), 500 - elif hf_delete_error: - return jsonify({'status': 'ok', 'message': f'Файл "{original_filename}" удален из списка, но возникла ошибка при удалении с сервера хранения ({hf_delete_error}). Возможно, потребуется ручная очистка.'}), 207 # Multi-Status - else: - return jsonify({'status': 'ok', 'message': f'Файл "{original_filename}" успешно удален.'}) - - except Exception as e: - logging.error(f"Unexpected error during file deletion process for file {file_id}, user {tg_user_id}: {e}", exc_info=True) - return jsonify({'status': 'error', 'message': f'Непредвиденная ошибка при удалении файла: {e}'}), 500 - - -@app.route('/delete_folder/', methods=['POST']) -def delete_folder_route(folder_id): - if folder_id == 'root': - return jsonify({'status': 'error', 'message': 'Нельзя удалить корневую папку!'}), 400 - - data = request.get_json() - if not data or 'initData' not in data: - return jsonify({"status": "error", "message": "Неполный запрос (отсутствует initData)"}), 400 - - user_info = check_telegram_authorization(data['initData'], BOT_TOKEN) - if not user_info or 'id' not in user_info: - return jsonify({"status": "error", "message": "Не авторизован"}), 403 - - tg_user_id = str(user_info['id']) - - try: - db_data = load_data() - user_data = db_data.get('users', {}).get(tg_user_id) - - if not user_data or 'filesystem' not in user_data: - logging.error(f"User data or filesystem missing for validated user {tg_user_id} during delete folder.") - return jsonify({"status": "error", "message": "Ошибка данных пользователя при удалении папки."}), 500 - - folder_node, parent_node = find_node_by_id(user_data['filesystem'], folder_id) - - if not folder_node or folder_node.get('type') != 'folder': - logging.warning(f"Folder node {folder_id} not found or not a folder for deletion attempt by user {tg_user_id}.") - return jsonify({'status': 'error', 'message': 'Папка не найдена или не является папкой.'}), 404 - - if not parent_node: - # Root folder case is handled above. This means a non-root folder has no parent. - logging.error(f"Inconsistency: Folder node {folder_id} found for user {tg_user_id}, but parent node is missing.") - return jsonify({'status': 'error', 'message': 'Ошибка структуры данных: родительская папка не найдена.'}), 500 - - folder_name = folder_node.get('name', 'папка') - - # Check if folder is empty (important!) - if folder_node.get('children'): - logging.warning(f"Attempt to delete non-empty folder {folder_id} ('{folder_name}') by user {tg_user_id}.") - return jsonify({'status': 'error', 'message': f'Папку "{folder_name}" можно удалить только если она пуста.'}), 400 # Bad Request or Conflict 409? Using 400. - - # Attempt to remove node from the in-memory database structure - if remove_node(user_data['filesystem'], folder_id): - # Save the updated data - try: - save_data(db_data) - logging.info(f"Empty folder '{folder_name}' ({folder_id}) deleted successfully for user {tg_user_id}.") - return jsonify({'status': 'ok', 'message': f'Папка "{folder_name}" удалена.'}) - except Exception as e: - logging.error(f"CRITICAL: Error saving DB after removing folder node {folder_id} for user {tg_user_id}: {e}", exc_info=True) - # Attempt to restore the node in memory? Difficult. Log clearly. - return jsonify({'status': 'error', 'message': 'Папка удалена из списка, но произошла КРИТИЧЕСКАЯ ОШИБКА сохранения базы данных.'}), 500 - else: - # remove_node failed. Could be inconsistency or bug. - logging.error(f"Failed to remove empty folder node {folder_id} from DB structure for {tg_user_id}.") - return jsonify({'status': 'error', 'message': 'Не удалось удалить папку из структуры данных (возможно, ошибка).'}), 500 - - except Exception as e: - logging.error(f"Unexpected error during folder deletion process for folder {folder_id}, user {tg_user_id}: {e}", exc_info=True) - return jsonify({'status': 'error', 'message': f'Непредвиденная ошибка при удалении папки: {e}'}), 500 - -@app.route('/get_text_content/') -def get_text_content_route(file_id): - owner_user_id, file_node = find_file_owner_and_node(file_id) - - if not file_node: - return Response("Текстовый файл не найден", status=404, mimetype='text/plain') - - # Verify it's actually a text file according to our classification - if file_node.get('file_type') != 'text': - logging.warning(f"Attempt to get text content for non-text file {file_id} (type: {file_node.get('file_type')}), owner {owner_user_id}.") - return Response("Файл не является текстовым", status=400, mimetype='text/plain') - - - hf_path = file_node.get('hf_path') - if not hf_path: - return Response("Ошибка сервера: Путь к файлу не найден в метаданных.", status=500, mimetype='text/plain') - - file_url = f"https://huggingface.co/datasets/{REPO_ID}/raw/main/{hf_path}" - - try: - headers = {} - if HF_TOKEN_READ: - headers["authorization"] = f"Bearer {HF_TOKEN_READ}" - - response = requests.get(file_url, headers=headers, timeout=20) # Timeout for fetching content - response.raise_for_status() - - # Limit preview size to prevent browser freeze with huge files - max_preview_size_bytes = 1 * 1024 * 1024 # 1 MB limit for preview - if len(response.content) > max_preview_size_bytes: - logging.warning(f"Text file {file_id} (owner {owner_user_id}) is too large ({len(response.content)} bytes) for preview.") - return Response(f"Файл слишком большой для предпросмотра в браузере (> {max_preview_size_bytes // 1024 // 1024}MB). Скачайте файл для просмотра.", status=413, mimetype='text/plain') # Payload Too Large - - # Attempt to decode with common encodings - text_content = None - detected_encoding = None - # Try UTF-8 first as it's most common - encodings_to_try = ['utf-8', 'cp1251', 'latin-1', 'utf-16'] # Add more if needed - - try: - # Use requests' built-in encoding detection first, if available and seems reasonable - if response.encoding and response.apparent_encoding: - try: - text_content = response.content.decode(response.encoding) - detected_encoding = response.encoding - logging.info(f"Decoded text file {file_id} using response.encoding: {detected_encoding}") - except UnicodeDecodeError: - text_content = None # Fallback to manual loop - - if text_content is None: - for enc in encodings_to_try: - try: - text_content = response.content.decode(enc) - detected_encoding = enc - logging.info(f"Decoded text file {file_id} using manual attempt: {detected_encoding}") - break - except UnicodeDecodeError: - continue - except Exception as dec_e: # Catch other potential decoding errors - logging.warning(f"Error decoding {file_id} with {enc}: {dec_e}") - continue - - except Exception as outer_dec_e: - logging.error(f"Unexpected error during text decoding attempts for {file_id}: {outer_dec_e}") - return Response("Ошибка при декодировании содержимого файла.", status=500, mimetype='text/plain') - - - if text_content is None: - logging.error(f"Could not decode text file {file_id} (owner {owner_user_id}) with attempted encodings.") - # Try sending as bytes with fallback encoding? Or just error out. - return Response("Не удалось определить кодировку файла или файл не является текстовым.", status=400, mimetype='text/plain') - - # Return successfully decoded text - # Ensure correct mimetype with charset - return Response(text_content, mimetype=f'text/plain; charset={detected_encoding or "utf-8"}') - - except requests.exceptions.RequestException as e: - status_code = 502 - error_message = f"Ошибка загрузки содержимого ({e.__class__.__name__})" - if e.response is not None: - status_code = e.response.status_code - if status_code == 404: error_message = "Содержимое файла не найдено на сервере." - else: error_message = f"Ошибка сервера хранения ({status_code})" - logging.error(f"Error fetching text content from HF ({hf_path}, owner {owner_user_id}): Status {status_code}, Exception: {e}") - else: - logging.error(f"Network error fetching text content ({hf_path}, owner {owner_user_id}): {e}") - return Response(error_message, status=status_code, mimetype='text/plain') - except Exception as e: - logging.error(f"Unexpected error fetching text content ({hf_path}, owner {owner_user_id}): {e}", exc_info=True) - return Response("Внутренняя ошибка сервера при получении содержимого.", status=500, mimetype='text/plain') - - -@app.route('/preview_thumb/') -def preview_thumb_route(file_id): - owner_user_id, file_node = find_file_owner_and_node(file_id) - - if not file_node: - return Response("Превью не найдено", status=404, mimetype='text/plain') - - if file_node.get('file_type') != 'image': - return Response("Файл не является изображением", status=400, mimetype='text/plain') - - hf_path = file_node.get('hf_path') - if not hf_path: - return Response("Ошибка сервера: Путь к файлу не найден.", status=500, mimetype='text/plain') - - # Use the 'raw' endpoint for direct access - file_url = f"https://huggingface.co/datasets/{REPO_ID}/raw/main/{hf_path}" - - try: - headers = {} - if HF_TOKEN_READ: - headers["authorization"] = f"Bearer {HF_TOKEN_READ}" - - # Stream the response directly to the client - response = requests.get(file_url, headers=headers, stream=True, timeout=30) - response.raise_for_status() - - # Pass content type from HF response - content_type = response.headers.get('Content-Type', 'application/octet-stream') - if not content_type.startswith('image/'): - logging.warning(f"HF returned non-image content type '{content_type}' for image preview {file_id} (path: {hf_path}).") - # Fallback to octet-stream or return error? Let browser try to render. - content_type = 'application/octet-stream' - - - return Response(response.iter_content(chunk_size=65536), mimetype=content_type) - - except requests.exceptions.RequestException as e: - status_code = 502 - error_message = f"Ошибка загрузки превью ({e.__class__.__name__})" - if e.response is not None: - status_code = e.response.status_code - if status_code == 404: error_message = "Превью не найдено на сервере." - else: error_message = f"Ошибка сервера хранения ({status_code})" - logging.error(f"Error fetching preview from HF ({hf_path}, owner {owner_user_id}): Status {status_code}, Exception: {e}") - else: - logging.error(f"Network error fetching preview ({hf_path}, owner {owner_user_id}): {e}") - return Response(error_message, status=status_code, mimetype='text/plain') - except Exception as e: - logging.error(f"Unexpected error during preview fetch ({hf_path}, owner {owner_user_id}): {e}", exc_info=True) - return Response("Внутренняя ошибка сервера при загрузке превью.", status=500, mimetype='text/plain') - - -# --- Main Execution --- -if __name__ == '__main__': - print("Starting Flask Application...") - - if not BOT_TOKEN or BOT_TOKEN == 'YOUR_BOT_TOKEN': - logging.critical("\n" + "*"*70 + - "\n CRITICAL: TELEGRAM_BOT_TOKEN env var is not set or is invalid." + - "\n Telegram Mini App authentication WILL FAIL." + - "\n Set the TELEGRAM_BOT_TOKEN environment variable." + - "\n" + "*"*70) - # Optionally exit here in a production environment - # import sys - # sys.exit(1) - else: - logging.info("TELEGRAM_BOT_TOKEN found.") - - - if not HF_TOKEN_WRITE: - logging.warning("HF_TOKEN (write access) env var is not set. File uploads & deletions will FAIL.") - else: - logging.info("HF_TOKEN (write access) found.") - - if not HF_TOKEN_READ: - logging.warning("HF_TOKEN_READ env var is not set. Using HF_TOKEN for read access. File downloads/previews might fail for private repos if HF_TOKEN lacks read permission.") - else: - logging.info("HF_TOKEN_READ found.") - - # Perform initial database download check synchronously before starting app - logging.info("Performing initial check/download of database from Hugging Face...") - try: - download_db_from_hf() - # Load data once after download to ensure it's valid before starting server - load_data() - logging.info("Initial database check/load complete.") - except Exception as e: - logging.error(f"Failed initial database download/load: {e}. Starting with potentially empty/old data.", exc_info=True) - # Ensure a default empty file exists if all else fails - if not os.path.exists(DATA_FILE): - try: - with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump({'users': {}}, f) - logging.info(f"Created empty {DATA_FILE} as initial load failed.") - except Exception as create_e: - logging.error(f"CRITICAL: Failed to create empty {DATA_FILE}: {create_e}") - - - logging.info("Starting Flask server on host 0.0.0.0, port 7860...") - # Use waitress or gunicorn for production instead of Flask's built-in server - # For development/testing: - app.run(debug=False, host='0.0.0.0', port=7860, threaded=True) # threaded=True is important for background HF uploads - - # Example using waitress (install with pip install waitress): - # from waitress import serve - # serve(app, host='0.0.0.0', port=7860, threads=8) # Adjust threads as needed - -# --- END OF FILE app.py ---