diff --git "a/app.py" "b/app.py"
deleted file mode 100644--- "a/app.py"
+++ /dev/null
@@ -1,2531 +0,0 @@
-
-# --- START OF FILE app.py ---
-
-import os
-import hmac
-import hashlib
-import json
-from urllib.parse import unquote, parse_qsl, urlencode
-from flask import Flask, request, jsonify, Response, send_file
-import logging
-import threading
-import time
-from datetime import datetime
-from huggingface_hub import HfApi, hf_hub_download, utils as hf_utils
-from werkzeug.utils import secure_filename
-import requests
-from io import BytesIO
-import uuid
-from typing import Union, Optional, Dict, List, Tuple, Any
-
-app = Flask(__name__)
-app.secret_key = os.getenv("FLASK_SECRET_KEY", "supersecretkey_mini_app_unique")
-BOT_TOKEN = os.getenv('TELEGRAM_BOT_TOKEN', '6750208873:AAE2hvPlJ99dBdhGa_Brre0IIpUdOvXxHt4') # MUST be set
-DATA_FILE = 'cloudeng_mini_app_data.json'
-DATA_FILE_TMP = DATA_FILE + '.tmp'
-REPO_ID = "Eluza133/Z1e1u" # Same HF Repo
-HF_TOKEN_WRITE = os.getenv("HF_TOKEN")
-HF_TOKEN_READ = os.getenv("HF_TOKEN_READ") or HF_TOKEN_WRITE
-UPLOAD_FOLDER = 'uploads_mini_app'
-os.makedirs(UPLOAD_FOLDER, exist_ok=True)
-
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-AUTH_DATA_LIFETIME = 3600
-
-def find_node_by_id(filesystem: Dict[str, Any], node_id: str) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, Any]]]:
- if not filesystem or not isinstance(filesystem, dict):
- return None, None
- if filesystem.get('id') == node_id:
- return filesystem, None
-
- queue: List[Tuple[Dict[str, Any], Optional[Dict[str, Any]]]] = [(filesystem, None)]
- visited = {filesystem.get('id')}
-
- while queue:
- current_node, parent = queue.pop(0)
- if current_node.get('type') == 'folder' and 'children' in current_node:
- for i, child in enumerate(current_node.get('children', [])):
- if not isinstance(child, dict):
- logging.warning(f"Skipping non-dict child in folder {current_node.get('id')}: {child}")
- continue
- child_id = child.get('id')
- if not child_id: continue
-
- if child_id == node_id:
- return child, current_node
- if child_id not in visited and child.get('type') == 'folder':
- visited.add(child_id)
- queue.append((child, current_node))
- return None, None
-
-def add_node(filesystem: Dict[str, Any], parent_id: str, node_data: Dict[str, Any]) -> bool:
- parent_node, _ = find_node_by_id(filesystem, parent_id)
- if parent_node and parent_node.get('type') == 'folder':
- if 'children' not in parent_node or not isinstance(parent_node.get('children'), list):
- parent_node['children'] = []
- existing_ids = {child.get('id') for child in parent_node['children'] if isinstance(child, dict)}
- if node_data.get('id') not in existing_ids:
- parent_node['children'].append(node_data)
- return True
- else:
- logging.warning(f"Attempted to add node with duplicate ID {node_data.get('id')} to parent {parent_id}")
- return False # Indicate failure due to duplicate ID
- elif not parent_node:
- logging.error(f"Parent node {parent_id} not found when trying to add node {node_data.get('id')}")
- elif parent_node.get('type') != 'folder':
- logging.error(f"Attempted to add node {node_data.get('id')} to a non-folder parent {parent_id}")
- return False
-
-
-def remove_node(filesystem: Dict[str, Any], node_id: str) -> bool:
- node_to_remove, parent_node = find_node_by_id(filesystem, node_id)
-
- if node_to_remove and parent_node and 'children' in parent_node and isinstance(parent_node['children'], list):
- original_length = len(parent_node['children'])
- parent_node['children'] = [child for child in parent_node['children'] if not isinstance(child, dict) or child.get('id') != node_id]
- return len(parent_node['children']) < original_length
-
- if node_to_remove and node_id == filesystem.get('id'):
- logging.warning("Attempted to remove root node directly. This is not allowed.")
- return False
-
- if not node_to_remove:
- logging.warning(f"Node {node_id} not found for removal.")
- return False
- if not parent_node:
- logging.warning(f"Parent not found for node {node_id} during removal.")
- return False
-
- return False
-
-
-def get_node_path_list(filesystem: Dict[str, Any], node_id: str) -> List[Dict[str, str]]:
- path_list = []
- current_id = node_id
- processed_ids = set()
- max_depth = 20
- count = 0
-
- while current_id and current_id not in processed_ids and count < max_depth:
- processed_ids.add(current_id)
- count += 1
- node, parent = find_node_by_id(filesystem, current_id)
- if not node:
- logging.warning(f"Node {current_id} not found while building path.")
- break
- path_list.append({
- 'id': node.get('id', ''),
- 'name': node.get('name', node.get('original_filename', 'Unknown'))
- })
- if not parent:
- if current_id != filesystem.get('id'):
- logging.warning(f"Node {current_id} found but its parent is missing (and it's not root).")
- break
- parent_id = parent.get('id')
- if parent_id == current_id:
- logging.error(f"Filesystem loop detected at node {current_id}")
- break
- current_id = parent_id
-
- if count >= max_depth:
- logging.error(f"Max depth reached while building path for node {node_id}. Possible loop or deep structure.")
-
- # Ensure root is present if not already found
- if not any(p.get('id') == 'root' for p in path_list):
- root_node, _ = find_node_by_id(filesystem, 'root')
- root_name = root_node.get('name', 'Root') if root_node else 'Root'
- path_list.append({'id': 'root', 'name': root_name})
-
- # Reverse and remove duplicates (keeping first occurrence from root)
- final_path = []
- seen_ids = set()
- for item in reversed(path_list):
- item_id = item.get('id')
- if item_id and item_id not in seen_ids:
- final_path.append(item)
- seen_ids.add(item_id)
-
- # Make sure root is always first if it exists
- if final_path and final_path[0].get('id') != 'root':
- root_index = -1
- for i, item in enumerate(final_path):
- if item.get('id') == 'root':
- root_index = i
- break
- if root_index > 0:
- root_item = final_path.pop(root_index)
- final_path.insert(0, root_item)
- elif root_index == -1: # Root wasn't in the path, force add it
- root_node, _ = find_node_by_id(filesystem, 'root')
- root_name = root_node.get('name', 'Root') if root_node else 'Root'
- final_path.insert(0, {'id': 'root', 'name': root_name})
-
- return final_path
-
-def initialize_user_filesystem(user_data: Dict[str, Any]):
- if 'filesystem' not in user_data or not isinstance(user_data['filesystem'], dict) or not user_data['filesystem']:
- user_data['filesystem'] = {
- "type": "folder",
- "id": "root",
- "name": "Root",
- "children": []
- }
- elif 'id' not in user_data['filesystem'] or user_data['filesystem']['id'] != 'root':
- logging.warning(f"User filesystem exists but has invalid root. Resetting. User: {user_data.get('user_info', {}).get('id')}")
- user_data['filesystem'] = {
- "type": "folder",
- "id": "root",
- "name": "Root",
- "children": []
- }
- # Ensure essential root keys exist
- user_data['filesystem'].setdefault('type', 'folder')
- user_data['filesystem'].setdefault('id', 'root')
- user_data['filesystem'].setdefault('name', 'Root')
- user_data['filesystem'].setdefault('children', [])
-
-
-_data_cache = None
-_data_lock = threading.Lock()
-
-def load_data() -> Dict[str, Any]:
- global _data_cache
- with _data_lock:
- if _data_cache:
- # Perform a quick sanity check on cached data before returning
- if isinstance(_data_cache, dict) and 'users' in _data_cache:
- # Optionally re-initialize filesystems if needed, though should be done on load
- # for user_data in _data_cache.get('users', {}).values():
- # initialize_user_filesystem(user_data)
- return _data_cache
-
- logging.info("Cache miss or invalid cache. Loading data from source.")
- loaded_data = None
- try:
- download_db_from_hf()
- if os.path.exists(DATA_FILE):
- with open(DATA_FILE, 'r', encoding='utf-8') as file:
- loaded_data = json.load(file)
- if not isinstance(loaded_data, dict):
- logging.warning(f"{DATA_FILE} content is not a dict. Attempting to load backup or init empty.")
- loaded_data = None # Force re-init or backup check
- # Basic structure check
- elif 'users' not in loaded_data or not isinstance(loaded_data['users'], dict):
- logging.warning(f"{DATA_FILE} missing 'users' dict. Initializing.")
- loaded_data = {'users': {}}
-
- else:
- logging.warning(f"{DATA_FILE} not found locally after download attempt.")
- # Optionally check for backup file here if needed
-
- except FileNotFoundError:
- logging.warning(f"{DATA_FILE} not found locally. Will initialize empty data.")
- except json.JSONDecodeError:
- logging.error(f"Error decoding JSON from {DATA_FILE}. File might be corrupted.")
- # Optionally try loading DATA_FILE_TMP if it exists and seems valid
- except Exception as e:
- logging.error(f"Unexpected error loading data: {e}")
-
- # If loading failed or file was invalid, initialize empty structure
- if loaded_data is None:
- logging.warning("Initializing empty data structure.")
- loaded_data = {'users': {}}
-
- # Ensure basic structure and initialize filesystems for all users
- loaded_data.setdefault('users', {})
- for user_id, user_data in loaded_data['users'].items():
- if isinstance(user_data, dict):
- initialize_user_filesystem(user_data)
- else:
- logging.warning(f"Invalid data type for user {user_id}. Skipping.")
- # Optionally remove invalid user entry here
-
- _data_cache = loaded_data
- logging.info(f"Data loaded successfully. User count: {len(_data_cache.get('users', {}))}")
- return _data_cache
-
-def save_data(data: Dict[str, Any]):
- with _data_lock:
- if not isinstance(data, dict) or 'users' not in data:
- logging.error("Attempted to save invalid data structure. Aborting save.")
- # Optionally raise an exception here to signal the problem higher up
- return
-
- try:
- # Write to temporary file first
- with open(DATA_FILE_TMP, 'w', encoding='utf-8') as file:
- json.dump(data, file, ensure_ascii=False, indent=4)
-
- # If temporary write succeeds, replace original file
- os.replace(DATA_FILE_TMP, DATA_FILE)
- logging.info(f"Data saved locally to {DATA_FILE}")
-
- # Update cache
- global _data_cache
- _data_cache = data # Keep the in-memory version consistent
-
- # Schedule HF upload (run in background)
- upload_thread = threading.Thread(target=upload_db_to_hf_thread, args=(DATA_FILE,))
- upload_thread.start()
-
- except Exception as e:
- logging.error(f"Error saving data locally: {e}")
- # Consider what to do if save fails - log, maybe try again?
- # If DATA_FILE_TMP exists, it might contain the data that failed to replace.
- if os.path.exists(DATA_FILE_TMP):
- logging.warning(f"Temporary save file {DATA_FILE_TMP} may still exist.")
- # Do NOT clear cache here, as the save failed. Keep the last known good state.
- raise # Re-raise the exception so the calling function knows saving failed
-
-
-def upload_db_to_hf_thread(local_path: str):
- """Worker function for uploading DB to HF in a separate thread."""
- if not HF_TOKEN_WRITE:
- logging.warning("HF_TOKEN_WRITE not set, skipping database upload.")
- return
- if not os.path.exists(local_path):
- logging.error(f"Local file {local_path} not found for HF upload.")
- return
- try:
- api = HfApi()
- logging.info(f"Starting background upload of {local_path} to HF repo {REPO_ID}...")
- api.upload_file(
- path_or_fileobj=local_path,
- path_in_repo=os.path.basename(local_path), # Use just the filename for path_in_repo
- repo_id=REPO_ID,
- repo_type="dataset",
- token=HF_TOKEN_WRITE,
- commit_message=f"Backup MiniApp {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
- # removed run_as_future=True as we are already in a thread
- )
- logging.info("Database upload to Hugging Face completed.")
- except Exception as e:
- logging.error(f"Error during background database upload: {e}")
-
-
-def download_db_from_hf():
- if not HF_TOKEN_READ:
- logging.warning("HF_TOKEN_READ not set, skipping database download.")
- if not os.path.exists(DATA_FILE):
- try:
- with open(DATA_FILE, 'w', encoding='utf-8') as f:
- json.dump({'users': {}}, f)
- logging.info(f"Created empty local database file: {DATA_FILE}")
- except Exception as e:
- logging.error(f"Failed to create empty local database file: {e}")
- return
- try:
- logging.info(f"Attempting to download {DATA_FILE} from HF repo {REPO_ID}...")
- hf_hub_download(
- repo_id=REPO_ID,
- filename=DATA_FILE,
- repo_type="dataset",
- token=HF_TOKEN_READ,
- local_dir=".",
- local_dir_use_symlinks=False,
- force_download=True, # Ensure we get the latest version
- etag_timeout=10
- )
- logging.info("Database downloaded successfully from Hugging Face.")
- except hf_utils.RepositoryNotFoundError:
- logging.error(f"Repository {REPO_ID} not found on Hugging Face.")
- if not os.path.exists(DATA_FILE):
- with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump({'users': {}}, f)
- except hf_utils.EntryNotFoundError:
- logging.warning(f"{DATA_FILE} not found in repo {REPO_ID}. Using/Creating local version.")
- if not os.path.exists(DATA_FILE):
- with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump({'users': {}}, f)
- except requests.exceptions.ConnectionError as e:
- logging.error(f"Connection error downloading DB from HF: {e}. Using local version if available.")
- except Exception as e:
- logging.error(f"Generic error downloading database from HF: {e}")
- if not os.path.exists(DATA_FILE):
- logging.info("Creating empty local DB file as download failed and none exists.")
- with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump({'users': {}}, f)
-
-def get_file_type(filename: Optional[str]) -> str:
- if not filename or '.' not in filename: return 'other'
- ext = filename.lower().split('.')[-1]
- if ext in ['mp4', 'mov', 'avi', 'webm', 'mkv', 'wmv', 'flv', 'mpeg', 'mpg']: return 'video'
- if ext in ['jpg', 'jpeg', 'png', 'gif', 'bmp', 'webp', 'svg', 'heic', 'tiff']: return 'image'
- if ext == 'pdf': return 'pdf'
- if ext in ['txt', 'md', 'log', 'csv', 'json', 'xml', 'html', 'css', 'js', 'py', 'java', 'c', 'cpp', 'h']: return 'text'
- if ext in ['doc', 'docx', 'rtf']: return 'document'
- if ext in ['xls', 'xlsx']: return 'spreadsheet'
- if ext in ['ppt', 'pptx']: return 'presentation'
- if ext in ['zip', 'rar', '7z', 'gz', 'tar']: return 'archive'
- if ext in ['mp3', 'wav', 'ogg', 'aac', 'flac', 'm4a']: return 'audio'
- return 'other'
-
-def check_telegram_authorization(auth_data: str, bot_token: str) -> Optional[Dict[str, Any]]:
- if not auth_data or not bot_token or bot_token == 'YOUR_BOT_TOKEN':
- logging.warning("Validation skipped: Missing auth_data or valid BOT_TOKEN.")
- # In production, you should strictly return None here. For debugging, you might bypass.
- # return {"id": "12345", "first_name": "Debug", "last_name": "User", "username": "debug_user"} # DEBUG ONLY
- return None
-
- try:
- parsed_data = dict(parse_qsl(unquote(auth_data)))
- if "hash" not in parsed_data:
- logging.error("Hash not found in auth data")
- return None
-
- telegram_hash = parsed_data.pop('hash')
- auth_date_ts = int(parsed_data.get('auth_date', 0))
- current_ts = int(time.time())
-
- if abs(current_ts - auth_date_ts) > AUTH_DATA_LIFETIME:
- logging.warning(f"Auth data expired (Auth: {auth_date_ts}, Now: {current_ts}, Diff: {current_ts - auth_date_ts}, Limit: {AUTH_DATA_LIFETIME})")
- return None
-
- data_check_string = "\n".join(sorted([f"{k}={v}" for k, v in parsed_data.items()]))
- secret_key = hmac.new("WebAppData".encode(), bot_token.encode(), hashlib.sha256).digest()
- calculated_hash = hmac.new(secret_key, data_check_string.encode(), hashlib.sha256).hexdigest()
-
- if hmac.compare_digest(calculated_hash, telegram_hash):
- user_data_str = parsed_data.get('user')
- if user_data_str:
- try:
- user_info = json.loads(user_data_str)
- if 'id' not in user_info:
- logging.error("Validated user data missing 'id'")
- return None
- # Add auth_date to user_info for potential future use
- user_info['auth_date'] = auth_date_ts
- logging.info(f"Telegram validation successful for user ID: {user_info['id']}")
- return user_info
- except json.JSONDecodeError:
- logging.error("Failed to decode user JSON from auth data")
- return None
- else:
- logging.warning("No 'user' field in validated auth data")
- return None
- else:
- logging.warning("Hash mismatch during validation")
- # Log details for debugging if needed, carefully avoiding sensitive data
- # logging.debug(f"Hash mismatch. Received: {telegram_hash}, Calculated: {calculated_hash}, DataString: {data_check_string[:100]}...")
- return None
- except Exception as e:
- logging.error(f"Exception during validation: {e}", exc_info=True)
- return None
-
-
-HTML_TEMPLATE = """
-
-
-
-
-
- Zeus Cloud
-
-
-
-
-
-
-
-
Загрузка...
-
-
-
-
-
Zeus Cloud
-
-
-
-
-
-
-
-
-
-
-
Загрузка файлов
-
-
-
0%
-
-
-
-
-
Содержимое папки
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-"""
-
-
-@app.route('/')
-def index():
- return Response(HTML_TEMPLATE, mimetype='text/html')
-
-@app.route('/validate_init_data', methods=['POST'])
-def validate_init_data():
- data = request.get_json()
- if not data or 'initData' not in data:
- return jsonify({"status": "error", "message": "Missing initData"}), 400
-
- init_data_str = data['initData']
- user_info = check_telegram_authorization(init_data_str, BOT_TOKEN)
-
- if user_info and 'id' in user_info:
- tg_user_id = str(user_info['id'])
- needs_save = False
- try:
- db_data = load_data() # Load current data
- users = db_data.setdefault('users', {})
-
- # Ensure user exists and has basic structure
- if tg_user_id not in users or not isinstance(users.get(tg_user_id), dict):
- logging.info(f"New user or invalid data for: {tg_user_id}. Initializing.")
- users[tg_user_id] = {
- 'user_info': {},
- 'created_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
- 'filesystem': {} # Will be initialized below
- }
- needs_save = True
-
- user_data = users[tg_user_id]
-
- # Update user info if different or missing
- if user_data.get('user_info') != user_info:
- user_data['user_info'] = user_info
- needs_save = True
-
- # Initialize filesystem if missing or invalid
- if 'filesystem' not in user_data or not isinstance(user_data['filesystem'], dict) or not user_data['filesystem']:
- initialize_user_filesystem(user_data)
- needs_save = True
-
- # Perform save only if changes were made
- if needs_save:
- save_data(db_data)
- logging.info(f"User data initialized/updated and saved for {tg_user_id}")
-
- return jsonify({"status": "ok", "user": user_info})
-
- except Exception as e:
- # Catch potential errors during load/save for this specific validation
- logging.error(f"Error during user validation/initialization for {tg_user_id}: {e}", exc_info=True)
- return jsonify({"status": "error", "message": f"Ошибка сервера при обработке данных пользователя: {e}"}), 500
-
- else:
- logging.warning(f"Validation failed for initData starting with: {init_data_str[:100]}...")
- return jsonify({"status": "error", "message": "Недействительные данные авторизации Telegram."}), 403
-
-
-@app.route('/get_dashboard_data', methods=['POST'])
-def get_dashboard_data():
- data = request.get_json()
- if not data or 'initData' not in data or 'folder_id' not in data:
- return jsonify({"status": "error", "message": "Неполный запрос"}), 400
-
- user_info = check_telegram_authorization(data['initData'], BOT_TOKEN)
- if not user_info or 'id' not in user_info:
- return jsonify({"status": "error", "message": "Не авторизован"}), 403
-
- tg_user_id = str(user_info['id'])
- requested_folder_id = data['folder_id']
-
- try:
- db_data = load_data()
- user_data = db_data.get('users', {}).get(tg_user_id)
-
- if not user_data or 'filesystem' not in user_data:
- # This case should ideally be handled by validate_init_data, but double-check
- logging.error(f"Filesystem missing for validated user {tg_user_id}. Attempting re-initialization.")
- # Try to re-initialize and save
- if user_data is None: user_data = {} # Create if totally missing
- initialize_user_filesystem(user_data)
- db_data.setdefault('users', {})[tg_user_id] = user_data # Ensure it's in db_data
- save_data(db_data)
- # Now user_data should have a filesystem
- logging.info(f"Re-initialized filesystem for user {tg_user_id}")
-
- # Find the requested folder, default to root if not found or invalid
- current_folder, _ = find_node_by_id(user_data['filesystem'], requested_folder_id)
- folder_id_to_use = requested_folder_id
-
- if not current_folder or current_folder.get('type') != 'folder':
- logging.warning(f"Folder {requested_folder_id} invalid or not found for user {tg_user_id}. Defaulting to root.")
- folder_id_to_use = 'root'
- current_folder, _ = find_node_by_id(user_data['filesystem'], folder_id_to_use)
- if not current_folder: # This is critical if root is missing after initialization
- logging.error(f"CRITICAL: Root folder not found for user {tg_user_id} even after potential re-init.")
- # Attempt one last re-init and save, then fail if still bad
- initialize_user_filesystem(user_data)
- save_data(db_data)
- current_folder, _ = find_node_by_id(user_data['filesystem'], folder_id_to_use)
- if not current_folder:
- return jsonify({"status": "error", "message": "Критическая ошибка: Корневая папка отсутствует и не может быть создана."}), 500
-
- items_in_folder = current_folder.get('children', [])
- # Ensure items are dicts (filter out potential bad data)
- items_in_folder = [item for item in items_in_folder if isinstance(item, dict)]
-
- breadcrumbs = get_node_path_list(user_data['filesystem'], folder_id_to_use)
-
- current_folder_info = {
- 'id': current_folder.get('id'),
- 'name': current_folder.get('name', 'Root') # Default name if missing
- }
-
- return jsonify({
- "status": "ok",
- "items": items_in_folder,
- "breadcrumbs": breadcrumbs,
- "current_folder": current_folder_info
- })
-
- except Exception as e:
- logging.error(f"Error fetching dashboard data for user {tg_user_id}, folder {requested_folder_id}: {e}", exc_info=True)
- return jsonify({"status": "error", "message": f"Ошибка сервера при получении данных: {e}"}), 500
-
-
-@app.route('/upload', methods=['POST'])
-def upload_files():
- # Use request.form for initData and folder_id from FormData
- init_data = request.form.get('initData')
- current_folder_id = request.form.get('current_folder_id', 'root')
- files = request.files.getlist('files')
-
- if not init_data:
- return jsonify({"status": "error", "message": "Отсутствуют данные авторизации"}), 400
-
- user_info = check_telegram_authorization(init_data, BOT_TOKEN)
- if not user_info or 'id' not in user_info:
- return jsonify({"status": "error", "message": "Не авторизован"}), 403
-
- tg_user_id = str(user_info['id'])
-
- if not HF_TOKEN_WRITE:
- logging.error("Upload failed: HF_TOKEN_WRITE not configured.")
- return jsonify({'status': 'error', 'message': 'Загрузка файлов временно недоступна (ошибка конфигурации).'}), 503 # Service Unavailable
-
- if not files or all(not f.filename for f in files):
- return jsonify({'status': 'error', 'message': 'Файлы для загрузки не выбраны.'}), 400
-
- # Load data ONCE before the loop
- try:
- db_data = load_data()
- user_data = db_data.get('users', {}).get(tg_user_id)
-
- if not user_data or 'filesystem' not in user_data:
- logging.error(f"Filesystem missing for validated user {tg_user_id} during upload.")
- # Attempt re-initialization before failing
- if user_data is None: user_data = {}
- initialize_user_filesystem(user_data)
- db_data.setdefault('users', {})[tg_user_id] = user_data
- save_data(db_data) # Save the re-initialized state
- logging.info(f"Re-initialized filesystem for user {tg_user_id} during upload.")
- # Reload user_data after potential save
- user_data = db_data.get('users', {}).get(tg_user_id)
- if not user_data or 'filesystem' not in user_data: # If still missing, fail
- return jsonify({"status": "error", "message": "Критическая ошибка данных пользователя при загрузке."}), 500
-
- target_folder_node, _ = find_node_by_id(user_data['filesystem'], current_folder_id)
- if not target_folder_node or target_folder_node.get('type') != 'folder':
- logging.warning(f"Target folder {current_folder_id} not found or invalid for user {tg_user_id}. Upload rejected.")
- return jsonify({'status': 'error', 'message': 'Целевая папка не найдена!'}), 404
-
- except Exception as e:
- logging.error(f"Error preparing for upload for user {tg_user_id}: {e}", exc_info=True)
- return jsonify({'status': 'error', 'message': f'Ошибка подготовки к загрузке: {e}'}), 500
-
-
- api = HfApi()
- uploaded_count = 0
- errors = []
- needs_save = False # Flag to save DB only if changes were made
-
- for file in files:
- if file and file.filename:
- original_filename = secure_filename(file.filename)
- if not original_filename:
- logging.warning(f"Skipping file with potentially unsafe name after securing: {file.filename}")
- errors.append(f"Пропуск файла с небезопасным именем: {file.filename}")
- continue
-
- name_part, ext_part = os.path.splitext(original_filename)
- unique_suffix = uuid.uuid4().hex[:8]
- # Keep filename structure simpler, rely on UUID for uniqueness if needed
- # unique_filename = f"{name_part}_{unique_suffix}{ext_part}"
- unique_filename = original_filename # Use original secured name for HF path for clarity? Risk of collision if user uploads same name twice. Let's add UUID.
- unique_internal_filename = f"{name_part}_{unique_suffix}{ext_part}"
-
-
- file_id = uuid.uuid4().hex
- # Structure HF path more logically: files / user_id / file_id / filename
- # This prevents issues with folder names containing special chars and filename collisions.
- # Using file_id ensures uniqueness at the storage level.
- hf_path = f"user_files/{tg_user_id}/{file_id}/{unique_internal_filename}"
- temp_path = os.path.join(UPLOAD_FOLDER, f"{file_id}_{unique_internal_filename}")
-
- try:
- file.seek(0) # Ensure reading from the start
- file.save(temp_path)
- logging.info(f"Uploading {original_filename} ({file_id}) to HF path: {hf_path} for user {tg_user_id}")
-
- api.upload_file(
- path_or_fileobj=temp_path,
- path_in_repo=hf_path,
- repo_id=REPO_ID,
- repo_type="dataset",
- token=HF_TOKEN_WRITE,
- commit_message=f"User {tg_user_id} uploaded {original_filename} (ID: {file_id})"
- # run_as_future=True # Consider if parallel uploads are needed and safe
- )
- logging.info(f"Successfully uploaded {original_filename} ({file_id}) to HF.")
-
- file_info = {
- 'type': 'file',
- 'id': file_id,
- 'original_filename': original_filename,
- 'hf_path': hf_path, # Store the HF path used
- 'file_type': get_file_type(original_filename),
- 'upload_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
- # Do NOT store unique_internal_filename unless needed for display logic
- # 'unique_filename': unique_internal_filename
- }
-
- # Add node to the IN-MEMORY db_data
- if add_node(user_data['filesystem'], current_folder_id, file_info):
- uploaded_count += 1
- needs_save = True # Mark that DB needs saving
- logging.info(f"Added metadata for file {file_id} to folder {current_folder_id} for user {tg_user_id}.")
- else:
- # This case (add_node returning False) might mean duplicate ID, which shouldn't happen with UUIDs.
- # More likely, parent folder disappeared or became invalid mid-request (very unlikely).
- errors.append(f"Критическая ошибка: не удалось добавить метаданные для {original_filename}.")
- logging.error(f"Failed add_node for {file_id} to {current_folder_id} for {tg_user_id}. Filesystem state: {user_data['filesystem']}")
- # Attempt to clean up orphaned HF file
- try:
- logging.warning(f"Attempting cleanup of potentially orphaned HF file: {hf_path}")
- api.delete_file(path_in_repo=hf_path, repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN_WRITE)
- except Exception as del_err:
- logging.error(f"Failed deleting orphaned HF file {hf_path} after add_node failure: {del_err}")
-
- except Exception as e:
- logging.error(f"Upload processing error for {original_filename} (User: {tg_user_id}): {e}", exc_info=True)
- errors.append(f"Ошибка загрузки {original_filename}: {str(e)[:100]}") # Keep error messages concise
- # If upload to HF failed, no need to delete, it wasn't uploaded.
- # If add_node failed, cleanup is attempted above.
- finally:
- # Clean up local temporary file regardless of success/failure
- if os.path.exists(temp_path):
- try:
- os.remove(temp_path)
- except OSError as e_remove:
- logging.error(f"Error removing temp file {temp_path}: {e_remove}")
-
- # Save the entire DB state ONCE after processing all files, only if changes occurred
- if needs_save:
- try:
- save_data(db_data)
- logging.info(f"DB saved successfully after uploading {uploaded_count} files for user {tg_user_id}.")
- except Exception as e:
- logging.error(f"CRITICAL: Error saving DB after successful uploads for user {tg_user_id}: {e}", exc_info=True)
- errors.append("Критическая ошибка: Не удалось сохранить обновленную информацию о файлах.")
- # At this point, files are on HF but not in DB. Manual recovery might be needed.
-
- final_message = f"Обработано файлов: {len(files)}. Успешно загружено: {uploaded_count}."
- if errors:
- error_summary = "; ".join(errors[:3]) # Show first few errors
- if len(errors) > 3: error_summary += f" (и еще {len(errors)-3})"
- final_message += f" Ошибки: {error_summary}"
- logging.warning(f"Upload completed with errors for user {tg_user_id}. Errors: {errors}")
-
- return jsonify({
- "status": "ok" if uploaded_count > 0 and not errors else ("partial" if uploaded_count > 0 and errors else "error"),
- "message": final_message
- })
-
-@app.route('/create_folder', methods=['POST'])
-def create_folder():
- data = request.get_json()
- if not data or 'initData' not in data or 'parent_folder_id' not in data or 'folder_name' not in data:
- return jsonify({"status": "error", "message": "Неполный запрос"}), 400
-
- user_info = check_telegram_authorization(data['initData'], BOT_TOKEN)
- if not user_info or 'id' not in user_info:
- return jsonify({"status": "error", "message": "Не авторизован"}), 403
-
- tg_user_id = str(user_info['id'])
- parent_folder_id = data['parent_folder_id']
- folder_name = data['folder_name'].strip()
-
- if not folder_name:
- return jsonify({'status': 'error', 'message': 'Имя папки не может быть пустым!'}), 400
- # Basic validation for problematic characters
- if '/' in folder_name or '\\' in folder_name or ':' in folder_name or '<' in folder_name or '>' in folder_name:
- return jsonify({'status': 'error', 'message': 'Имя папки содержит недопустимые символы.'}), 400
- if len(folder_name) > 255: # Filesystem limits often around 255 bytes/chars
- return jsonify({'status': 'error', 'message': 'Имя папки слишком длинное.'}), 400
-
- try:
- db_data = load_data()
- user_data = db_data.get('users', {}).get(tg_user_id)
- if not user_data or 'filesystem' not in user_data:
- logging.error(f"Filesystem missing for validated user {tg_user_id} during folder creation.")
- # Try re-init before failing
- if user_data is None: user_data = {}
- initialize_user_filesystem(user_data)
- db_data.setdefault('users', {})[tg_user_id] = user_data
- save_data(db_data)
- logging.info(f"Re-initialized filesystem for user {tg_user_id} during folder creation.")
- user_data = db_data.get('users', {}).get(tg_user_id)
- if not user_data or 'filesystem' not in user_data:
- return jsonify({"status": "error", "message": "Критическая ошибка данных пользователя при создании папки."}), 500
-
- # Check if folder with the same name already exists in the parent
- parent_node, _ = find_node_by_id(user_data['filesystem'], parent_folder_id)
- if parent_node and parent_node.get('type') == 'folder':
- existing_names = {
- child.get('name', '').lower()
- for child in parent_node.get('children', [])
- if isinstance(child, dict) and child.get('type') == 'folder'
- }
- if folder_name.lower() in existing_names:
- return jsonify({'status': 'error', 'message': f'Папка с именем "{folder_name}" уже существует здесь.'}), 409 # Conflict
-
-
- folder_id = uuid.uuid4().hex
- folder_data = {
- 'type': 'folder',
- 'id': folder_id,
- 'name': folder_name,
- 'children': [],
- 'created_at': datetime.now().strftime('%Y-%m-%d %H:%M:%S') # Add creation time
- }
-
- # Add node in memory
- if add_node(user_data['filesystem'], parent_folder_id, folder_data):
- # Save the updated data
- save_data(db_data)
- logging.info(f"Folder '{folder_name}' ({folder_id}) created for user {tg_user_id} in parent {parent_folder_id}.")
- return jsonify({'status': 'ok', 'message': f'Папка "{folder_name}" создана.', 'new_folder': folder_data})
- else:
- # add_node failed, likely parent not found or invalid
- logging.error(f"Failed to add folder node '{folder_name}' ({folder_id}) to parent {parent_folder_id} for user {tg_user_id}.")
- parent_node_check, _ = find_node_by_id(user_data['filesystem'], parent_folder_id) # Re-check parent existence
- if not parent_node_check:
- return jsonify({'status': 'error', 'message': 'Не удалось найти родительскую папку.'}), 404
- elif parent_node_check.get('type') != 'folder':
- return jsonify({'status': 'error', 'message': 'Невозможно создать папку внутри файла.'}), 400
- else: # Should not happen if parent exists and is a folder
- return jsonify({'status': 'error', 'message': 'Неизвестная ошибка при добавлении папки.'}), 500
-
- except Exception as e:
- logging.error(f"Error creating folder '{folder_name}' for user {tg_user_id}: {e}", exc_info=True)
- return jsonify({'status': 'error', 'message': f'Ошибка сервера при создании папки: {e}'}), 500
-
-def find_file_owner_and_node(file_id: str) -> Tuple[Optional[str], Optional[Dict[str, Any]]]:
- """ Helper to find file node and its owner's ID across all users. """
- try:
- db_data = load_data()
- for user_id, user_data in db_data.get('users', {}).items():
- if isinstance(user_data, dict) and 'filesystem' in user_data:
- node, _ = find_node_by_id(user_data['filesystem'], file_id)
- if node and node.get('type') == 'file':
- return user_id, node
- except Exception as e:
- logging.error(f"Error searching for file {file_id} in load_data: {e}", exc_info=True)
- return None, None
-
-
-@app.route('/download/')
-def download_file_route(file_id):
- owner_user_id, file_node = find_file_owner_and_node(file_id)
-
- if not file_node:
- return Response("Файл не найден", status=404, mimetype='text/plain')
-
- # Use hf_path stored during upload
- hf_path = file_node.get('hf_path')
- original_filename = file_node.get('original_filename', f'{file_id}_download')
-
- if not hf_path:
- logging.error(f"Missing HF path (hf_path) for file ID {file_id} (owner: {owner_user_id})")
- return Response("Ошибка сервера: Путь к файлу не найден в метаданных.", status=500, mimetype='text/plain')
-
- # Construct the direct download URL (can bypass resolve for direct links if path is correct)
- # file_url = f"https://huggingface.co/datasets/{REPO_ID}/resolve/main/{hf_path}?download=true"
- # Use LFS pointer URL format (more robust for large files, potentially) - needs testing
- file_url = f"https://huggingface.co/datasets/{REPO_ID}/raw/main/{hf_path}" # Changed resolve to raw
-
-
- try:
- headers = {}
- if HF_TOKEN_READ:
- headers["authorization"] = f"Bearer {HF_TOKEN_READ}"
-
- # Use stream=True for large files
- response = requests.get(file_url, headers=headers, stream=True, timeout=60) # Increased timeout
- response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
-
- # Correctly encode filename for Content-Disposition
- # Standard ASCII fallback + UTF-8 version
- try:
- # Simple ASCII version for basic compatibility
- ascii_filename = original_filename.encode('ascii', 'ignore').decode('ascii')
- except UnicodeEncodeError:
- ascii_filename = f"file_{file_id}" # Fallback if name is purely non-ASCII
-
- utf8_filename_encoded = urlencode({'filename': original_filename}, encoding='utf-8')[9:] # Get the encoded part
- disposition = f'attachment; filename="{ascii_filename}"; filename*=UTF-8\'\'{utf8_filename_encoded}'
-
-
- return Response(response.iter_content(chunk_size=65536), # 64KB chunks
- mimetype=response.headers.get('Content-Type', 'application/octet-stream'),
- headers={"Content-Disposition": disposition})
-
- except requests.exceptions.RequestException as e:
- status_code = 502 # Bad Gateway default
- error_message = f"Ошибка скачивания файла с сервера ({e.__class__.__name__})"
- if e.response is not None:
- status_code = e.response.status_code
- if status_code == 404:
- error_message = "Файл не найден на сервере хранения."
- logging.warning(f"HF file not found (404) at path {hf_path} for file ID {file_id}, owner {owner_user_id}.")
- else:
- error_message = f"Ошибка сервера хранения ({status_code})"
- # Log response body for non-404 errors if helpful? Careful with large bodies.
- # error_details = e.response.text[:200] if e.response.text else ''
- # logging.error(f"Error downloading file from HF ({hf_path}, owner: {owner_user_id}): Status {status_code}, Response: {error_details}..., Exception: {e}")
-
- else:
- logging.error(f"Network or request error downloading file from HF ({hf_path}, owner: {owner_user_id}): {e}")
-
- return Response(error_message, status=status_code, mimetype='text/plain')
- except Exception as e:
- logging.error(f"Unexpected error during file download ({hf_path}, owner: {owner_user_id}): {e}", exc_info=True)
- return Response("Внутренняя ошибка сервера при обработке запроса на скачивание.", status=500, mimetype='text/plain')
-
-
-@app.route('/delete_file/', methods=['POST'])
-def delete_file_route(file_id):
- data = request.get_json()
- if not data or 'initData' not in data: # current_folder_id not strictly needed for delete by ID
- return jsonify({"status": "error", "message": "Неполный запрос (отсутствует initData)"}), 400
-
- user_info = check_telegram_authorization(data['initData'], BOT_TOKEN)
- if not user_info or 'id' not in user_info:
- return jsonify({"status": "error", "message": "Не авторизован"}), 403
-
- tg_user_id = str(user_info['id'])
-
- if not HF_TOKEN_WRITE:
- logging.error(f"Delete rejected for user {tg_user_id}: HF_TOKEN_WRITE not configured.")
- return jsonify({'status': 'error', 'message': 'Удаление файлов временно недоступно (ошибка конфигурации).'}), 503
-
- try:
- db_data = load_data()
- user_data = db_data.get('users', {}).get(tg_user_id)
-
- if not user_data or 'filesystem' not in user_data:
- # This indicates a problem, user should exist if validation passed
- logging.error(f"User data or filesystem missing for validated user {tg_user_id} during delete file.")
- return jsonify({"status": "error", "message": "Ошибка данных пользователя при удалении."}), 500
-
- file_node, parent_node = find_node_by_id(user_data['filesystem'], file_id)
-
- if not file_node or file_node.get('type') != 'file':
- # Check if the file exists under *another* user (shouldn't happen with UUIDs, but check)
- other_owner, other_node = find_file_owner_and_node(file_id)
- if other_node:
- logging.warning(f"User {tg_user_id} attempted to delete file {file_id} owned by {other_owner}.")
- return jsonify({'status': 'error', 'message': 'У вас нет прав на удаление этого файла.'}), 403
- else:
- logging.warning(f"File node {file_id} not found for deletion attempt by user {tg_user_id}.")
- return jsonify({'status': 'error', 'message': 'Файл не найден.'}), 404
-
- if not parent_node:
- # This is an inconsistency, file exists but parent doesn't?
- logging.error(f"Inconsistency: File node {file_id} found for user {tg_user_id}, but parent node is missing.")
- # Allow deletion from HF, but log error about DB state.
- # Fall through to HF deletion, but DB removal might fail.
-
-
- hf_path = file_node.get('hf_path')
- original_filename = file_node.get('original_filename', 'файл')
- needs_save = False
- hf_delete_error = None
-
- # Step 1: Attempt to delete from Hugging Face Hub
- if hf_path:
- try:
- api = HfApi()
- logging.info(f"Attempting to delete HF file {hf_path} for file ID {file_id}, user {tg_user_id}")
- api.delete_file(
- path_in_repo=hf_path,
- repo_id=REPO_ID,
- repo_type="dataset",
- token=HF_TOKEN_WRITE,
- commit_message=f"User {tg_user_id} deleted file {original_filename} (ID: {file_id})"
- )
- logging.info(f"Successfully deleted file {hf_path} from HF Hub for user {tg_user_id}")
- except hf_utils.EntryNotFoundError:
- logging.warning(f"File {hf_path} (ID: {file_id}) not found on HF Hub for delete attempt by user {tg_user_id}. Proceeding with DB removal.")
- # This is okay, file might have been deleted manually or failed upload previously
- except Exception as e:
- hf_delete_error = e
- logging.error(f"Error deleting file from HF Hub ({hf_path}, user {tg_user_id}): {e}", exc_info=True)
- # Decide whether to proceed with DB removal despite HF error.
- # For now, let's proceed but report the error.
- else:
- logging.warning(f"No hf_path found for file {file_id} (user {tg_user_id}). Skipping HF deletion.")
-
- # Step 2: Attempt to remove node from the in-memory database structure
- if remove_node(user_data['filesystem'], file_id):
- needs_save = True
- logging.info(f"Removed file node {file_id} from DB structure for user {tg_user_id}")
- else:
- # This means the node wasn't found by remove_node, even though find_node_by_id found it earlier.
- # Indicates a potential race condition or bug in node finding/removal.
- logging.error(f"Inconsistency: Failed to remove file node {file_id} from DB structure for {tg_user_id} after it was initially found.")
- # If HF deletion failed, this is less critical. If HF succeeded, we have an orphaned entry in DB.
-
- # Step 3: Save the database if changes were made
- save_error = None
- if needs_save:
- try:
- save_data(db_data)
- logging.info(f"DB saved successfully after removing file {file_id} for user {tg_user_id}.")
- except Exception as e:
- save_error = e
- logging.error(f"CRITICAL: Error saving DB after removing file node {file_id} for user {tg_user_id}: {e}", exc_info=True)
- # DB state might be inconsistent now.
-
- # Step 4: Determine final status and message
- if not needs_save and not hf_delete_error:
- # This happens if remove_node failed (e.g., inconsistency) AND HF delete also failed or was skipped.
- return jsonify({'status': 'error', 'message': 'Не удалось удалить файл (возможно, он уже удален или ошибка данных).'}), 500
- elif save_error:
- return jsonify({'status': 'error', 'message': f'Файл удален с сервера (ошибка HF: {hf_delete_error}), но КРИТИЧЕСКАЯ ОШИБКА сохранения базы данных: {save_error}'}), 500
- elif hf_delete_error:
- return jsonify({'status': 'ok', 'message': f'Файл "{original_filename}" удален из списка, но возникла ошибка при удалении с сервера хранения ({hf_delete_error}). Возможно, потребуется ручная очистка.'}), 207 # Multi-Status
- else:
- return jsonify({'status': 'ok', 'message': f'Файл "{original_filename}" успешно удален.'})
-
- except Exception as e:
- logging.error(f"Unexpected error during file deletion process for file {file_id}, user {tg_user_id}: {e}", exc_info=True)
- return jsonify({'status': 'error', 'message': f'Непредвиденная ошибка при удалении файла: {e}'}), 500
-
-
-@app.route('/delete_folder/', methods=['POST'])
-def delete_folder_route(folder_id):
- if folder_id == 'root':
- return jsonify({'status': 'error', 'message': 'Нельзя удалить корневую папку!'}), 400
-
- data = request.get_json()
- if not data or 'initData' not in data:
- return jsonify({"status": "error", "message": "Неполный запрос (отсутствует initData)"}), 400
-
- user_info = check_telegram_authorization(data['initData'], BOT_TOKEN)
- if not user_info or 'id' not in user_info:
- return jsonify({"status": "error", "message": "Не авторизован"}), 403
-
- tg_user_id = str(user_info['id'])
-
- try:
- db_data = load_data()
- user_data = db_data.get('users', {}).get(tg_user_id)
-
- if not user_data or 'filesystem' not in user_data:
- logging.error(f"User data or filesystem missing for validated user {tg_user_id} during delete folder.")
- return jsonify({"status": "error", "message": "Ошибка данных пользователя при удалении папки."}), 500
-
- folder_node, parent_node = find_node_by_id(user_data['filesystem'], folder_id)
-
- if not folder_node or folder_node.get('type') != 'folder':
- logging.warning(f"Folder node {folder_id} not found or not a folder for deletion attempt by user {tg_user_id}.")
- return jsonify({'status': 'error', 'message': 'Папка не найдена или не является папкой.'}), 404
-
- if not parent_node:
- # Root folder case is handled above. This means a non-root folder has no parent.
- logging.error(f"Inconsistency: Folder node {folder_id} found for user {tg_user_id}, but parent node is missing.")
- return jsonify({'status': 'error', 'message': 'Ошибка структуры данных: родительская папка не найдена.'}), 500
-
- folder_name = folder_node.get('name', 'папка')
-
- # Check if folder is empty (important!)
- if folder_node.get('children'):
- logging.warning(f"Attempt to delete non-empty folder {folder_id} ('{folder_name}') by user {tg_user_id}.")
- return jsonify({'status': 'error', 'message': f'Папку "{folder_name}" можно удалить только если она пуста.'}), 400 # Bad Request or Conflict 409? Using 400.
-
- # Attempt to remove node from the in-memory database structure
- if remove_node(user_data['filesystem'], folder_id):
- # Save the updated data
- try:
- save_data(db_data)
- logging.info(f"Empty folder '{folder_name}' ({folder_id}) deleted successfully for user {tg_user_id}.")
- return jsonify({'status': 'ok', 'message': f'Папка "{folder_name}" удалена.'})
- except Exception as e:
- logging.error(f"CRITICAL: Error saving DB after removing folder node {folder_id} for user {tg_user_id}: {e}", exc_info=True)
- # Attempt to restore the node in memory? Difficult. Log clearly.
- return jsonify({'status': 'error', 'message': 'Папка удалена из списка, но произошла КРИТИЧЕСКАЯ ОШИБКА сохранения базы данных.'}), 500
- else:
- # remove_node failed. Could be inconsistency or bug.
- logging.error(f"Failed to remove empty folder node {folder_id} from DB structure for {tg_user_id}.")
- return jsonify({'status': 'error', 'message': 'Не удалось удалить папку из структуры данных (возможно, ошибка).'}), 500
-
- except Exception as e:
- logging.error(f"Unexpected error during folder deletion process for folder {folder_id}, user {tg_user_id}: {e}", exc_info=True)
- return jsonify({'status': 'error', 'message': f'Непредвиденная ошибка при удалении папки: {e}'}), 500
-
-@app.route('/get_text_content/')
-def get_text_content_route(file_id):
- owner_user_id, file_node = find_file_owner_and_node(file_id)
-
- if not file_node:
- return Response("Текстовый файл не найден", status=404, mimetype='text/plain')
-
- # Verify it's actually a text file according to our classification
- if file_node.get('file_type') != 'text':
- logging.warning(f"Attempt to get text content for non-text file {file_id} (type: {file_node.get('file_type')}), owner {owner_user_id}.")
- return Response("Файл не является текстовым", status=400, mimetype='text/plain')
-
-
- hf_path = file_node.get('hf_path')
- if not hf_path:
- return Response("Ошибка сервера: Путь к файлу не найден в метаданных.", status=500, mimetype='text/plain')
-
- file_url = f"https://huggingface.co/datasets/{REPO_ID}/raw/main/{hf_path}"
-
- try:
- headers = {}
- if HF_TOKEN_READ:
- headers["authorization"] = f"Bearer {HF_TOKEN_READ}"
-
- response = requests.get(file_url, headers=headers, timeout=20) # Timeout for fetching content
- response.raise_for_status()
-
- # Limit preview size to prevent browser freeze with huge files
- max_preview_size_bytes = 1 * 1024 * 1024 # 1 MB limit for preview
- if len(response.content) > max_preview_size_bytes:
- logging.warning(f"Text file {file_id} (owner {owner_user_id}) is too large ({len(response.content)} bytes) for preview.")
- return Response(f"Файл слишком большой для предпросмотра в браузере (> {max_preview_size_bytes // 1024 // 1024}MB). Скачайте файл для просмотра.", status=413, mimetype='text/plain') # Payload Too Large
-
- # Attempt to decode with common encodings
- text_content = None
- detected_encoding = None
- # Try UTF-8 first as it's most common
- encodings_to_try = ['utf-8', 'cp1251', 'latin-1', 'utf-16'] # Add more if needed
-
- try:
- # Use requests' built-in encoding detection first, if available and seems reasonable
- if response.encoding and response.apparent_encoding:
- try:
- text_content = response.content.decode(response.encoding)
- detected_encoding = response.encoding
- logging.info(f"Decoded text file {file_id} using response.encoding: {detected_encoding}")
- except UnicodeDecodeError:
- text_content = None # Fallback to manual loop
-
- if text_content is None:
- for enc in encodings_to_try:
- try:
- text_content = response.content.decode(enc)
- detected_encoding = enc
- logging.info(f"Decoded text file {file_id} using manual attempt: {detected_encoding}")
- break
- except UnicodeDecodeError:
- continue
- except Exception as dec_e: # Catch other potential decoding errors
- logging.warning(f"Error decoding {file_id} with {enc}: {dec_e}")
- continue
-
- except Exception as outer_dec_e:
- logging.error(f"Unexpected error during text decoding attempts for {file_id}: {outer_dec_e}")
- return Response("Ошибка при декодировании содержимого файла.", status=500, mimetype='text/plain')
-
-
- if text_content is None:
- logging.error(f"Could not decode text file {file_id} (owner {owner_user_id}) with attempted encodings.")
- # Try sending as bytes with fallback encoding? Or just error out.
- return Response("Не удалось определить кодировку файла или файл не является текстовым.", status=400, mimetype='text/plain')
-
- # Return successfully decoded text
- # Ensure correct mimetype with charset
- return Response(text_content, mimetype=f'text/plain; charset={detected_encoding or "utf-8"}')
-
- except requests.exceptions.RequestException as e:
- status_code = 502
- error_message = f"Ошибка загрузки содержимого ({e.__class__.__name__})"
- if e.response is not None:
- status_code = e.response.status_code
- if status_code == 404: error_message = "Содержимое файла не найдено на сервере."
- else: error_message = f"Ошибка сервера хранения ({status_code})"
- logging.error(f"Error fetching text content from HF ({hf_path}, owner {owner_user_id}): Status {status_code}, Exception: {e}")
- else:
- logging.error(f"Network error fetching text content ({hf_path}, owner {owner_user_id}): {e}")
- return Response(error_message, status=status_code, mimetype='text/plain')
- except Exception as e:
- logging.error(f"Unexpected error fetching text content ({hf_path}, owner {owner_user_id}): {e}", exc_info=True)
- return Response("Внутренняя ошибка сервера при получении содержимого.", status=500, mimetype='text/plain')
-
-
-@app.route('/preview_thumb/')
-def preview_thumb_route(file_id):
- owner_user_id, file_node = find_file_owner_and_node(file_id)
-
- if not file_node:
- return Response("Превью не найдено", status=404, mimetype='text/plain')
-
- if file_node.get('file_type') != 'image':
- return Response("Файл не является изображением", status=400, mimetype='text/plain')
-
- hf_path = file_node.get('hf_path')
- if not hf_path:
- return Response("Ошибка сервера: Путь к файлу не найден.", status=500, mimetype='text/plain')
-
- # Use the 'raw' endpoint for direct access
- file_url = f"https://huggingface.co/datasets/{REPO_ID}/raw/main/{hf_path}"
-
- try:
- headers = {}
- if HF_TOKEN_READ:
- headers["authorization"] = f"Bearer {HF_TOKEN_READ}"
-
- # Stream the response directly to the client
- response = requests.get(file_url, headers=headers, stream=True, timeout=30)
- response.raise_for_status()
-
- # Pass content type from HF response
- content_type = response.headers.get('Content-Type', 'application/octet-stream')
- if not content_type.startswith('image/'):
- logging.warning(f"HF returned non-image content type '{content_type}' for image preview {file_id} (path: {hf_path}).")
- # Fallback to octet-stream or return error? Let browser try to render.
- content_type = 'application/octet-stream'
-
-
- return Response(response.iter_content(chunk_size=65536), mimetype=content_type)
-
- except requests.exceptions.RequestException as e:
- status_code = 502
- error_message = f"Ошибка загрузки превью ({e.__class__.__name__})"
- if e.response is not None:
- status_code = e.response.status_code
- if status_code == 404: error_message = "Превью не найдено на сервере."
- else: error_message = f"Ошибка сервера хранения ({status_code})"
- logging.error(f"Error fetching preview from HF ({hf_path}, owner {owner_user_id}): Status {status_code}, Exception: {e}")
- else:
- logging.error(f"Network error fetching preview ({hf_path}, owner {owner_user_id}): {e}")
- return Response(error_message, status=status_code, mimetype='text/plain')
- except Exception as e:
- logging.error(f"Unexpected error during preview fetch ({hf_path}, owner {owner_user_id}): {e}", exc_info=True)
- return Response("Внутренняя ошибка сервера при загрузке превью.", status=500, mimetype='text/plain')
-
-
-# --- Main Execution ---
-if __name__ == '__main__':
- print("Starting Flask Application...")
-
- if not BOT_TOKEN or BOT_TOKEN == 'YOUR_BOT_TOKEN':
- logging.critical("\n" + "*"*70 +
- "\n CRITICAL: TELEGRAM_BOT_TOKEN env var is not set or is invalid." +
- "\n Telegram Mini App authentication WILL FAIL." +
- "\n Set the TELEGRAM_BOT_TOKEN environment variable." +
- "\n" + "*"*70)
- # Optionally exit here in a production environment
- # import sys
- # sys.exit(1)
- else:
- logging.info("TELEGRAM_BOT_TOKEN found.")
-
-
- if not HF_TOKEN_WRITE:
- logging.warning("HF_TOKEN (write access) env var is not set. File uploads & deletions will FAIL.")
- else:
- logging.info("HF_TOKEN (write access) found.")
-
- if not HF_TOKEN_READ:
- logging.warning("HF_TOKEN_READ env var is not set. Using HF_TOKEN for read access. File downloads/previews might fail for private repos if HF_TOKEN lacks read permission.")
- else:
- logging.info("HF_TOKEN_READ found.")
-
- # Perform initial database download check synchronously before starting app
- logging.info("Performing initial check/download of database from Hugging Face...")
- try:
- download_db_from_hf()
- # Load data once after download to ensure it's valid before starting server
- load_data()
- logging.info("Initial database check/load complete.")
- except Exception as e:
- logging.error(f"Failed initial database download/load: {e}. Starting with potentially empty/old data.", exc_info=True)
- # Ensure a default empty file exists if all else fails
- if not os.path.exists(DATA_FILE):
- try:
- with open(DATA_FILE, 'w', encoding='utf-8') as f: json.dump({'users': {}}, f)
- logging.info(f"Created empty {DATA_FILE} as initial load failed.")
- except Exception as create_e:
- logging.error(f"CRITICAL: Failed to create empty {DATA_FILE}: {create_e}")
-
-
- logging.info("Starting Flask server on host 0.0.0.0, port 7860...")
- # Use waitress or gunicorn for production instead of Flask's built-in server
- # For development/testing:
- app.run(debug=False, host='0.0.0.0', port=7860, threaded=True) # threaded=True is important for background HF uploads
-
- # Example using waitress (install with pip install waitress):
- # from waitress import serve
- # serve(app, host='0.0.0.0', port=7860, threads=8) # Adjust threads as needed
-
-# --- END OF FILE app.py ---