diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,11 +1,11 @@
-from flask import Flask, render_template_string, request, redirect, url_for, jsonify, flash, send_from_directory
+from flask import Flask, render_template_string, request, redirect, url_for, jsonify, flash
import json
import os
import logging
import threading
import time
from datetime import datetime
-from huggingface_hub import HfApi, hf_hub_download, list_repo_files
+from huggingface_hub import HfApi, hf_hub_download, delete_file as hf_delete_file
from huggingface_hub.utils import RepositoryNotFoundError, HfHubHTTPError, EntryNotFoundError
from werkzeug.utils import secure_filename
from dotenv import load_dotenv
@@ -18,49 +18,44 @@ load_dotenv()
app = Flask(__name__)
app.secret_key = os.getenv("FLASK_SECRET_KEY", 'tontalent_secret_key_for_flash_messages_only')
-
DATA_FILE = 'tontalent_data.json'
-UPLOADS_DIR_NAME = 'uploads' # Relative to project root
-app.config['UPLOAD_FOLDER'] = UPLOADS_DIR_NAME
-os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
-
-MAX_IMAGE_FILES = 10
-ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif', 'webp'}
+SYNC_FILES = [DATA_FILE] # For DB file itself
-SYNC_FILES = [DATA_FILE] # Main data file, images handled separately based on this file
+# Configuration for image uploads
+UPLOAD_FOLDER_NAME = 'uploads_temp' # Temporary local storage before HF upload
+app.config['UPLOAD_FOLDER'] = os.path.join(app.instance_path, UPLOAD_FOLDER_NAME)
+app.config['MAX_IMAGE_UPLOADS'] = 10
+app.config['ALLOWED_EXTENSIONS'] = {'png', 'jpg', 'jpeg', 'gif'}
REPO_ID = os.getenv("HF_REPO_ID", "Kgshop/tontalent2")
HF_TOKEN_WRITE = os.getenv("HF_TOKEN_WRITE")
HF_TOKEN_READ = os.getenv("HF_TOKEN_READ")
-TELEGRAM_BOT_TOKEN = "7549355625:AAGhdbf6x1JEzpH0mUtuxTF83Soi7MFVNZ8" # Replace with your actual bot token
+TELEGRAM_BOT_TOKEN = "7549355625:AAGhdbf6x1JEzpH0mUtuxTF83Soi7MFVNZ8" # Replace with your actual bot token if needed
DOWNLOAD_RETRIES = 3
DOWNLOAD_DELAY = 5
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-def allowed_file(filename):
- return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+# Ensure instance folder and temp upload folder exist
+try:
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+except OSError as e:
+ logging.error(f"Error creating upload folder {app.config['UPLOAD_FOLDER']}: {e}")
+
-def _get_all_image_paths_from_data(data_dict):
- image_paths = set()
- for item_type_key in ['resumes', 'vacancies', 'freelance_offers']:
- for item in data_dict.get(item_type_key, []):
- for img_path in item.get('images', []):
- if img_path.startswith(UPLOADS_DIR_NAME + "/"): # Ensure it's a path we manage
- image_paths.add(img_path)
- return list(image_paths)
+def allowed_file(filename):
+ return '.' in filename and \
+ filename.rsplit('.', 1)[1].lower() in app.config['ALLOWED_EXTENSIONS']
def download_db_from_hf(specific_file=None, retries=DOWNLOAD_RETRIES, delay=DOWNLOAD_DELAY):
if not HF_TOKEN_READ and not HF_TOKEN_WRITE:
logging.warning("HF_TOKEN_READ/HF_TOKEN_WRITE not set. Download might fail for private repos.")
token_to_use = HF_TOKEN_READ if HF_TOKEN_READ else HF_TOKEN_WRITE
-
files_to_download = [specific_file] if specific_file else SYNC_FILES
- logging.info(f"Attempting download for primary files {files_to_download} from {REPO_ID}...")
+ logging.info(f"Attempting download for {files_to_download} from {REPO_ID}...")
all_successful = True
-
for file_name in files_to_download:
success = False
for attempt in range(retries + 1):
@@ -75,11 +70,10 @@ def download_db_from_hf(specific_file=None, retries=DOWNLOAD_RETRIES, delay=DOWN
success = True
break
except RepositoryNotFoundError:
- logging.error(f"Repository {REPO_ID} not found. Download cancelled.")
- return False
- except (HfHubHTTPError, EntryNotFoundError) as e:
- is_404 = isinstance(e, EntryNotFoundError) or (isinstance(e, HfHubHTTPError) and e.response.status_code == 404)
- if is_404:
+ logging.error(f"Repository {REPO_ID} not found. Download cancelled for all files.")
+ return False
+ except HfHubHTTPError as e:
+ if e.response.status_code == 404:
logging.warning(f"File {file_name} not found in repo {REPO_ID} (404). Skipping this file.")
if attempt == 0 and not os.path.exists(file_name):
try:
@@ -87,169 +81,96 @@ def download_db_from_hf(specific_file=None, retries=DOWNLOAD_RETRIES, delay=DOWN
with open(file_name, 'w', encoding='utf-8') as f:
json.dump({'resumes': [], 'vacancies': [], 'freelance_offers': [], 'users': {}}, f)
logging.info(f"Created empty local file {file_name} because it was not found on HF.")
- success = True # Created locally, treat as success for this file
except Exception as create_e:
logging.error(f"Failed to create empty local file {file_name}: {create_e}")
- break
+ success = True # Consider it successful if it's a 404 and we're okay with it (e.g. new repo)
+ break # Don't retry 404
else:
logging.error(f"HTTP error downloading {file_name} (Attempt {attempt + 1}): {e}. Retrying in {delay}s...")
except Exception as e:
- logging.error(f"Unexpected error downloading {file_name} (Attempt {attempt + 1}): {e}. Retrying in {delay}s...", exc_info=True)
+ logging.error(f"Unexpected error downloading {file_name} (Attempt {attempt + 1}): {e}. Retrying in {delay}s...", exc_info=True)
if attempt < retries: time.sleep(delay)
if not success:
logging.error(f"Failed to download {file_name} after {retries + 1} attempts.")
all_successful = False
-
- if not all_successful:
- logging.error("Not all primary files downloaded successfully. Image download might be skipped or incomplete.")
- return False
-
- # Download images if DATA_FILE was part of the download or no specific file was requested (full sync)
- if (specific_file is None or specific_file == DATA_FILE) and os.path.exists(DATA_FILE):
- logging.info("Downloading referenced image files...")
- try:
- with open(DATA_FILE, 'r', encoding='utf-8') as f:
- data_content = json.load(f)
- image_paths_in_data = _get_all_image_paths_from_data(data_content)
-
- for img_repo_path in image_paths_in_data:
- local_img_full_path = os.path.join(app.root_path, img_repo_path)
- os.makedirs(os.path.dirname(local_img_full_path), exist_ok=True)
- img_success = False
- for attempt in range(retries + 1):
- try:
- logging.info(f"Downloading image {img_repo_path} (Attempt {attempt+1})")
- hf_hub_download(
- repo_id=REPO_ID, filename=img_repo_path, repo_type="dataset",
- token=token_to_use, local_dir=".", local_dir_use_symlinks=False, # local_dir="." means files go into UPLOADS_DIR_NAME/
- force_download=True, resume_download=False
- )
- logging.info(f"Successfully downloaded image {img_repo_path}.")
- img_success = True
- break
- except (HfHubHTTPError, EntryNotFoundError) as e_img:
- is_404_img = isinstance(e_img, EntryNotFoundError) or (isinstance(e_img, HfHubHTTPError) and e_img.response.status_code == 404)
- if is_404_img:
- logging.warning(f"Image {img_repo_path} not found on HF (404). Skipping.")
- break
- else:
- logging.error(f"HTTP error downloading image {img_repo_path}: {e_img}. Retrying...")
- except Exception as e_img:
- logging.error(f"Unexpected error downloading image {img_repo_path}: {e_img}. Retrying...", exc_info=True)
- if attempt < retries: time.sleep(delay)
- if not img_success:
- logging.error(f"Failed to download image {img_repo_path} after multiple attempts.")
- all_successful = False # Mark overall sync as potentially incomplete
- except Exception as e:
- logging.error(f"Error processing or downloading images: {e}", exc_info=True)
- all_successful = False
-
logging.info(f"Download process finished. Overall success: {all_successful}")
return all_successful
+def upload_file_to_hf_with_retry(local_path, path_in_repo, repo_id, token, commit_message, retries=3, delay=5):
+ api = HfApi()
+ for attempt in range(retries + 1):
+ try:
+ api.upload_file(
+ path_or_fileobj=local_path,
+ path_in_repo=path_in_repo,
+ repo_id=repo_id,
+ repo_type="dataset",
+ token=token,
+ commit_message=commit_message
+ )
+ logging.info(f"File {local_path} successfully uploaded to {path_in_repo} in {repo_id}.")
+ return True
+ except Exception as e:
+ logging.error(f"Error uploading {local_path} to HF (Attempt {attempt + 1}): {e}")
+ if attempt < retries:
+ time.sleep(delay)
+ else:
+ logging.error(f"Failed to upload {local_path} to HF after {retries + 1} attempts.")
+ return False
-def upload_db_to_hf(specific_file_local_path=None, specific_file_repo_path=None):
+def upload_db_to_hf(specific_file=None):
if not HF_TOKEN_WRITE:
- logging.warning("HF_TOKEN_WRITE not set. Skipping upload to Hugging Face.")
+ logging.warning("HF_TOKEN_WRITE not set. Skipping upload of DB to Hugging Face.")
return
-
try:
- api = HfApi()
-
- files_to_upload_map = {} # {local_path: repo_path}
-
- if specific_file_local_path and specific_file_repo_path:
- if os.path.exists(specific_file_local_path):
- files_to_upload_map[specific_file_local_path] = specific_file_repo_path
- else:
- logging.warning(f"Specific file {specific_file_local_path} not found locally for upload.")
- else: # Full sync
- for file_name in SYNC_FILES: # DATA_FILE
- if os.path.exists(file_name):
- files_to_upload_map[file_name] = file_name
-
- # Add images referenced in DATA_FILE for full sync
- if os.path.exists(DATA_FILE):
- try:
- with open(DATA_FILE, 'r', encoding='utf-8') as f:
- data_content = json.load(f)
- image_paths_in_data = _get_all_image_paths_from_data(data_content)
- for img_repo_path in image_paths_in_data: # img_repo_path is like 'uploads/file.jpg'
- local_img_path = os.path.join(app.root_path, img_repo_path)
- if os.path.exists(local_img_path):
- files_to_upload_map[local_img_path] = img_repo_path
- else:
- logging.warning(f"Image {local_img_path} (referenced in data) not found locally, skipping upload.")
- except Exception as e:
- logging.error(f"Error reading DATA_FILE for image paths during upload: {e}")
-
- if not files_to_upload_map:
- logging.info("No files to upload.")
- return
-
- logging.info(f"Starting upload of {len(files_to_upload_map)} file(s) to HF repo {REPO_ID}...")
- for local_path, repo_path_in_repo in files_to_upload_map.items():
- try:
- logging.info(f"Uploading {local_path} to {repo_path_in_repo}...")
- api.upload_file(
- path_or_fileobj=local_path, path_in_repo=repo_path_in_repo, repo_id=REPO_ID,
- repo_type="dataset", token=HF_TOKEN_WRITE,
- commit_message=f"Sync {os.path.basename(local_path)} {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+ files_to_upload = [specific_file] if specific_file else SYNC_FILES
+ logging.info(f"Starting upload of {files_to_upload} to HF repo {REPO_ID}...")
+ for file_name in files_to_upload:
+ if os.path.exists(file_name):
+ upload_file_to_hf_with_retry(
+ local_path=file_name,
+ path_in_repo=file_name,
+ repo_id=REPO_ID,
+ token=HF_TOKEN_WRITE,
+ commit_message=f"Sync {file_name} {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
)
- logging.info(f"File {local_path} successfully uploaded to {repo_path_in_repo}.")
- except Exception as e:
- logging.error(f"Error uploading file {local_path} to Hugging Face: {e}")
- logging.info("Finished uploading files to HF.")
-
+ else:
+ logging.warning(f"File {file_name} not found locally, skipping upload.")
+ logging.info("Finished uploading DB files to HF.")
except Exception as e:
- logging.error(f"General error during Hugging Face upload initialization or process: {e}", exc_info=True)
+ logging.error(f"General error during Hugging Face DB upload initialization or process: {e}", exc_info=True)
-def delete_files_from_hf(file_paths_in_repo):
+def delete_image_from_hf(filename_in_repo_uploads_dir):
if not HF_TOKEN_WRITE:
- logging.warning("HF_TOKEN_WRITE not set. Skipping deletion from Hugging Face.")
+ logging.warning("HF_TOKEN_WRITE not set. Skipping image deletion from Hugging Face.")
return False
- if not file_paths_in_repo:
- return True
-
- api = HfApi()
try:
- logging.info(f"Attempting to delete files from HF: {file_paths_in_repo}")
- # HfApi().delete_files expects list of strings, not string
- paths_to_delete = file_paths_in_repo if isinstance(file_paths_in_repo, list) else [file_paths_in_repo]
- api.delete_files(
+ path_in_repo = f"uploads/{filename_in_repo_uploads_dir}"
+ logging.info(f"Attempting to delete {path_in_repo} from HF repo {REPO_ID}")
+ hf_delete_file(
+ path_in_repo=path_in_repo,
repo_id=REPO_ID,
- paths_in_repo=paths_to_delete,
repo_type="dataset",
token=HF_TOKEN_WRITE,
- commit_message=f"Deleted files: {', '.join(paths_to_delete)}"
+ commit_message=f"Delete image {filename_in_repo_uploads_dir}"
)
- logging.info(f"Successfully deleted files from HF: {paths_to_delete}")
+ logging.info(f"Successfully deleted {path_in_repo} from Hugging Face.")
return True
except EntryNotFoundError:
- logging.warning(f"Some files not found on HF during deletion: {paths_to_delete}. Considered as success.")
- return True # If not found, it's effectively deleted or was never there.
+ logging.warning(f"Image {path_in_repo} not found on Hugging Face. Could be already deleted.")
+ return True # Consider it success if not found
except Exception as e:
- logging.error(f"Error deleting files from Hugging Face: {e}", exc_info=True)
+ logging.error(f"Error deleting image {path_in_repo} from Hugging Face: {e}", exc_info=True)
return False
-def delete_local_files(local_file_paths):
- for local_path in local_file_paths:
- try:
- if os.path.exists(local_path):
- os.remove(local_path)
- logging.info(f"Locally deleted {local_path}")
- except Exception as e:
- logging.error(f"Error deleting local file {local_path}: {e}")
-
-
def periodic_backup():
backup_interval = 1800
logging.info(f"Setting up periodic backup every {backup_interval} seconds.")
while True:
time.sleep(backup_interval)
logging.info("Starting periodic backup...")
- upload_db_to_hf()
+ upload_db_to_hf()
logging.info("Periodic backup finished.")
def load_data():
@@ -260,14 +181,19 @@ def load_data():
logging.info(f"Local data loaded successfully from {DATA_FILE}")
if not isinstance(data, dict):
logging.warning(f"Local {DATA_FILE} is not a dictionary. Attempting download.")
- raise FileNotFoundError
+ raise FileNotFoundError
for key in default_data:
if key not in data: data[key] = default_data[key]
+ for item_type in ['resumes', 'vacancies', 'freelance_offers']: # Ensure image_filenames field
+ if item_type in data:
+ for item in data[item_type]:
+ if 'image_filenames' not in item:
+ item['image_filenames'] = []
return data
except (FileNotFoundError, json.JSONDecodeError) as e:
logging.warning(f"Error loading local data ({e}). Attempting download from HF.")
- if download_db_from_hf(specific_file=DATA_FILE): # This will also attempt to download images referenced in DATA_FILE
+ if download_db_from_hf(specific_file=DATA_FILE):
try:
with open(DATA_FILE, 'r', encoding='utf-8') as file:
data = json.load(file)
@@ -277,6 +203,11 @@ def load_data():
return default_data
for key in default_data:
if key not in data: data[key] = default_data[key]
+ for item_type in ['resumes', 'vacancies', 'freelance_offers']:
+ if item_type in data:
+ for item in data[item_type]:
+ if 'image_filenames' not in item:
+ item['image_filenames'] = []
return data
except Exception as load_e:
logging.error(f"Error loading downloaded {DATA_FILE}: {load_e}. Using default.", exc_info=True)
@@ -299,18 +230,16 @@ def save_data(data):
default_keys = {'resumes': [], 'vacancies': [], 'freelance_offers': [], 'users': {}}
for key in default_keys:
if key not in data: data[key] = default_keys[key]
+ for item_type in ['resumes', 'vacancies', 'freelance_offers']: # Ensure image_filenames field
+ if item_type in data:
+ for item in data[item_type]:
+ if 'image_filenames' not in item:
+ item['image_filenames'] = []
with open(DATA_FILE, 'w', encoding='utf-8') as file:
json.dump(data, file, ensure_ascii=False, indent=4)
logging.info(f"Data successfully saved to {DATA_FILE}")
- upload_db_to_hf(specific_file_local_path=DATA_FILE, specific_file_repo_path=DATA_FILE) # Upload DATA_FILE first
- # Then upload all images referenced in it.
- # The general upload_db_to_hf() without args will handle both DATA_FILE and its referenced images.
- # For fine-grained control, could call: upload_db_to_hf() here without args
- # to ensure images are also synced after DATA_FILE is updated.
- # Current `upload_db_to_hf` will try to upload all referenced images if no specific_file args are given.
- # So for just saving data, it's enough to upload DATA_FILE.
- # The periodic backup or admin force upload will do a more thorough sync.
+ upload_db_to_hf(specific_file=DATA_FILE)
except Exception as e:
logging.error(f"Error saving data to {DATA_FILE}: {e}", exc_info=True)
@@ -318,12 +247,7 @@ def verify_telegram_auth_data(auth_data_str, bot_token):
if not auth_data_str:
return False, None
- params = {}
- try:
- params = dict(urllib.parse.parse_qsl(auth_data_str))
- except Exception: # Broad exception for parsing issues
- return False, None
-
+ params = dict(urllib.parse.parse_qsl(auth_data_str))
if 'hash' not in params:
return False, None
@@ -358,44 +282,129 @@ MAIN_APP_TEMPLATE = '''