import os import asyncio import logging from telethon import TelegramClient from telethon.errors import SessionPasswordNeededError, PhoneCodeInvalidError, AuthKeyError from huggingface_hub import upload_file from dotenv import load_dotenv from flask import Flask, request, render_template, jsonify import threading # === Load secrets from .env === load_dotenv() API_ID = os.getenv("API_ID") API_HASH = os.getenv("API_HASH") HF_TOKEN = os.getenv("HF_TOKEN") CHANNEL = os.getenv("CHANNEL_USERNAME") REPO_ID = os.getenv("DATASET_REPO") DATA_PATH = "telegram_uploads" # === Logging setup === logging.basicConfig( level=logging.INFO, format="%(asctime)s — %(levelname)s — %(message)s", handlers=[ logging.FileHandler("upload.log"), logging.StreamHandler() ] ) # === Setup Telegram Client (only if credentials are available) === client = None if API_ID and API_HASH: try: client = TelegramClient("my_session", int(API_ID), API_HASH) except ValueError as e: logging.error(f"Failed to initialize Telegram client: {e}") client = None # === Ensure download folder exists === os.makedirs("downloads", exist_ok=True) # === Upload wrapper === def upload_to_dataset(filepath): if not HF_TOKEN or not REPO_ID: return False, f"❌ Missing Hugging Face credentials" try: upload_file( path_or_fileobj=filepath, path_in_repo=f"{DATA_PATH}/{os.path.basename(filepath)}", repo_id=REPO_ID, repo_type="dataset", token=HF_TOKEN ) logging.info(f"[↑] Uploaded: {filepath}") return True, f"✅ Uploaded: {os.path.basename(filepath)}" except Exception as e: logging.error(f"[!] Upload failed: {filepath} — {e}") return False, f"❌ Upload failed: {os.path.basename(filepath)} — {e}" # === Main file processing logic with improved error handling === async def process_filenames(name_input): if not client: return "❌ Error: Telegram client not initialized. Please check your API credentials." if not CHANNEL: return "❌ Error: Channel username not configured." try: # Check if client is already connected if not client.is_connected(): await client.connect() # Check if we're authorized if not await client.is_user_authorized(): return "❌ Error: Telegram client not authorized. This application requires a pre-authenticated session file." filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()] results = [] found = set() # Use a more conservative approach to message iteration try: messages = [] async for msg in client.iter_messages(CHANNEL, limit=30000): messages.append(msg) except Exception as e: logging.error(f"Error iterating messages: {e}") return f"❌ Error accessing channel messages: {str(e)}" for i, msg in enumerate(messages): if msg.media and msg.file: fname = msg.file.name or f"file_{msg.id}{msg.file.ext}" for search in filenames: if search in fname.lower() and fname not in found: found.add(fname) path = f"downloads/{fname}" if not os.path.exists(path): try: await msg.download_media(file=path) success, msg_text = upload_to_dataset(path) results.append(msg_text) except Exception as download_error: logging.error(f"Download error for {fname}: {download_error}") results.append(f"❌ Download failed: {fname} — {str(download_error)}") else: results.append(f"⏩ Already exists: {fname}") break matched_names = [r.split(":")[-1].strip().lower() for r in results if r.startswith("✅") or r.startswith("⏩")] for name in filenames: if not any(name in matched for matched in matched_names): results.append(f"❌ Not found: {name}") return "\n".join(results) if results else "❌ No files matched." except AuthKeyError: logging.error("Auth key error - session may be corrupted") return "❌ Error: Session authentication failed. The session file may be corrupted or expired." except SessionPasswordNeededError: logging.error("Two-factor authentication required") return "❌ Error: Two-factor authentication is enabled. This application requires a pre-authenticated session." except EOFError as e: logging.error(f"EOF Error: {e}") return "❌ Error: Connection interrupted. This may be due to network issues or session problems." except Exception as e: logging.error(f"Error in process_filenames: {e}") return f"❌ Error: {str(e)}" def run_async_in_thread(coro): """Run async function in a separate thread with its own event loop""" def run_in_thread(): loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: return loop.run_until_complete(coro) finally: loop.close() import concurrent.futures with concurrent.futures.ThreadPoolExecutor() as executor: future = executor.submit(run_in_thread) return future.result() # === Flask App with explicit template and static folder paths === # Get the directory where this script is located basedir = os.path.abspath(os.path.dirname(__file__)) app = Flask(__name__, template_folder=os.path.join(basedir, 'templates'), static_folder=os.path.join(basedir, 'static')) @app.route('/') def index(): return render_template('index.html') @app.route('/upload', methods=['POST']) def upload(): try: filenames_input = request.form.get('filenames', '').strip() if not filenames_input: return "❌ Error: No filenames provided", 400 # Check if credentials are configured if not client: return "❌ Error: Application not configured. Please set up your environment variables with API credentials.", 500 # Run the async function in a separate thread results = run_async_in_thread(process_filenames(filenames_input)) return results except Exception as e: logging.error(f"Error in upload route: {e}") return f"❌ Error: {str(e)}", 500 @app.route('/health') def health(): status = { "status": "healthy", "message": "Hugging Face Uploader is running", "configured": { "telegram": bool(client), "huggingface": bool(HF_TOKEN and REPO_ID), "channel": bool(CHANNEL) }, "paths": { "basedir": basedir, "template_folder": app.template_folder, "static_folder": app.static_folder } } return jsonify(status) @app.route('/config') def config(): """Show configuration status""" config_status = { "API_ID": "✅ Set" if API_ID else "❌ Missing", "API_HASH": "✅ Set" if API_HASH else "❌ Missing", "HF_TOKEN": "✅ Set" if HF_TOKEN else "❌ Missing", "CHANNEL_USERNAME": "✅ Set" if CHANNEL else "❌ Missing", "DATASET_REPO": "✅ Set" if REPO_ID else "❌ Missing" } return jsonify(config_status) @app.route('/debug') def debug(): """Debug endpoint to check file structure""" import glob debug_info = { "current_directory": os.getcwd(), "script_directory": basedir, "template_folder": app.template_folder, "static_folder": app.static_folder, "files_in_current_dir": os.listdir('.'), "templates_exists": os.path.exists('templates'), "static_exists": os.path.exists('static'), "templates_files": glob.glob('templates/*') if os.path.exists('templates') else [], "static_files": glob.glob('static/**/*', recursive=True) if os.path.exists('static') else [], "session_file_exists": os.path.exists('my_session.session') } return jsonify(debug_info) @app.route('/session-info') def session_info(): """Check Telegram session status""" if not client: return jsonify({"error": "Client not initialized"}) try: # This is a synchronous check session_status = { "session_file_exists": os.path.exists('my_session.session'), "client_initialized": bool(client), "session_file_size": os.path.getsize('my_session.session') if os.path.exists('my_session.session') else 0 } return jsonify(session_status) except Exception as e: return jsonify({"error": str(e)}) if __name__ == '__main__': print("Starting Hugging Face Uploader...") print("Configuration status:") print(f" API_ID: {'✅ Set' if API_ID else '❌ Missing'}") print(f" API_HASH: {'✅ Set' if API_HASH else '❌ Missing'}") print(f" HF_TOKEN: {'✅ Set' if HF_TOKEN else '❌ Missing'}") print(f" CHANNEL_USERNAME: {'✅ Set' if CHANNEL else '❌ Missing'}") print(f" DATASET_REPO: {'✅ Set' if REPO_ID else '❌ Missing'}") print(f"\nPaths:") print(f" Base directory: {basedir}") print(f" Template folder: {app.template_folder}") print(f" Static folder: {app.static_folder}") print(f"\nSession info:") print(f" Session file exists: {os.path.exists('my_session.session')}") print("\n⚠️ IMPORTANT: This application requires a pre-authenticated Telegram session.") print(" You must create the session file locally first, then upload it to your Space.") print("\nTo configure, set environment variables in your Space settings.") print("Visit http://localhost:7860 to use the application.") # Use port 7860 for Hugging Face Spaces compatibility app.run(host='0.0.0.0', port=7860, debug=False)