Update app.py
Browse files
app.py
CHANGED
|
@@ -5,10 +5,12 @@ from telethon import TelegramClient
|
|
| 5 |
from telethon.errors import SessionPasswordNeededError, PhoneCodeInvalidError, AuthKeyError
|
| 6 |
from huggingface_hub import upload_file
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
-
from fastapi import FastAPI, Form,
|
| 9 |
-
from fastapi.responses import HTMLResponse,
|
| 10 |
from fastapi.staticfiles import StaticFiles
|
| 11 |
-
import
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# === Load secrets from .env ===
|
| 14 |
load_dotenv()
|
|
@@ -46,7 +48,6 @@ os.makedirs("downloads", exist_ok=True)
|
|
| 46 |
def upload_to_dataset(filepath):
|
| 47 |
if not HF_TOKEN or not REPO_ID:
|
| 48 |
return False, f"β Missing Hugging Face credentials"
|
| 49 |
-
|
| 50 |
try:
|
| 51 |
upload_file(
|
| 52 |
path_or_fileobj=filepath,
|
|
@@ -61,37 +62,34 @@ def upload_to_dataset(filepath):
|
|
| 61 |
logging.error(f"[!] Upload failed: {filepath} β {e}")
|
| 62 |
return False, f"β Upload failed: {os.path.basename(filepath)} β {e}"
|
| 63 |
|
| 64 |
-
# === Main file processing logic
|
| 65 |
async def process_filenames(name_input):
|
| 66 |
if not client:
|
| 67 |
return "β Error: Telegram client not initialized. Please check your API credentials."
|
| 68 |
-
|
| 69 |
if not CHANNEL:
|
| 70 |
return "β Error: Channel username not configured."
|
| 71 |
-
|
| 72 |
try:
|
| 73 |
-
# Check if client is already connected
|
| 74 |
if not client.is_connected():
|
| 75 |
await client.connect()
|
| 76 |
-
|
| 77 |
-
# Check if we're authorized
|
| 78 |
if not await client.is_user_authorized():
|
| 79 |
return "β Error: Telegram client not authorized. This application requires a pre-authenticated session file."
|
| 80 |
-
|
| 81 |
filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()]
|
| 82 |
results = []
|
| 83 |
found = set()
|
| 84 |
|
| 85 |
-
# Use a more conservative approach to message iteration
|
| 86 |
try:
|
| 87 |
messages = []
|
| 88 |
-
async for msg in client.iter_messages(CHANNEL, limit=
|
| 89 |
messages.append(msg)
|
| 90 |
except Exception as e:
|
| 91 |
logging.error(f"Error iterating messages: {e}")
|
| 92 |
return f"β Error accessing channel messages: {str(e)}"
|
| 93 |
|
| 94 |
-
for
|
| 95 |
if msg.media and msg.file:
|
| 96 |
fname = msg.file.name or f"file_{msg.id}{msg.file.ext}"
|
| 97 |
|
|
@@ -118,65 +116,59 @@ async def process_filenames(name_input):
|
|
| 118 |
results.append(f"β Not found: {name}")
|
| 119 |
|
| 120 |
return "\n".join(results) if results else "β No files matched."
|
| 121 |
-
|
| 122 |
except AuthKeyError:
|
| 123 |
-
logging.error("Auth key error - session may be corrupted")
|
| 124 |
return "β Error: Session authentication failed. The session file may be corrupted or expired."
|
| 125 |
except SessionPasswordNeededError:
|
| 126 |
-
logging.error("Two-factor authentication required")
|
| 127 |
return "β Error: Two-factor authentication is enabled. This application requires a pre-authenticated session."
|
| 128 |
except EOFError as e:
|
| 129 |
-
|
| 130 |
-
return "β Error: Connection interrupted. This may be due to network issues or session problems."
|
| 131 |
except Exception as e:
|
| 132 |
-
logging.error(f"Error in process_filenames: {e}")
|
| 133 |
return f"β Error: {str(e)}"
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
with open("index.html", "r", encoding="utf-8") as f:
|
| 143 |
-
return HTMLResponse(content=f.read())
|
| 144 |
-
except FileNotFoundError:
|
| 145 |
-
raise HTTPException(status_code=404, detail="index.html not found")
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
"""Serve the JavaScript file"""
|
| 155 |
-
return FileResponse("script.js", media_type="application/javascript")
|
| 156 |
|
| 157 |
-
@app.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
async def upload(filenames: str = Form(...)):
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
# Check if credentials are configured
|
| 165 |
-
if not client:
|
| 166 |
-
raise HTTPException(status_code=500, detail="β Error: Application not configured. Please set up your environment variables with API credentials.")
|
| 167 |
-
|
| 168 |
-
# Process filenames using the async function
|
| 169 |
-
results = await process_filenames(filenames)
|
| 170 |
-
return {"results": results}
|
| 171 |
-
|
| 172 |
-
except Exception as e:
|
| 173 |
-
logging.error(f"Error in upload route: {e}")
|
| 174 |
-
raise HTTPException(status_code=500, detail=f"β Error: {str(e)}")
|
| 175 |
|
| 176 |
-
@app.get("/health")
|
| 177 |
async def health():
|
| 178 |
-
|
| 179 |
-
status = {
|
| 180 |
"status": "healthy",
|
| 181 |
"message": "Hugging Face Uploader is running",
|
| 182 |
"configured": {
|
|
@@ -184,79 +176,30 @@ async def health():
|
|
| 184 |
"huggingface": bool(HF_TOKEN and REPO_ID),
|
| 185 |
"channel": bool(CHANNEL)
|
| 186 |
},
|
| 187 |
-
"
|
| 188 |
-
"
|
| 189 |
-
"
|
| 190 |
-
"
|
| 191 |
-
"session_file_exists": os.path.exists("my_session.session")
|
| 192 |
}
|
| 193 |
}
|
| 194 |
-
return status
|
| 195 |
|
| 196 |
-
@app.get("/config")
|
| 197 |
async def config():
|
| 198 |
-
|
| 199 |
-
config_status = {
|
| 200 |
"API_ID": "β
Set" if API_ID else "β Missing",
|
| 201 |
-
"API_HASH": "β
Set" if API_HASH else "β Missing",
|
| 202 |
"HF_TOKEN": "β
Set" if HF_TOKEN else "β Missing",
|
| 203 |
"CHANNEL_USERNAME": "β
Set" if CHANNEL else "β Missing",
|
| 204 |
"DATASET_REPO": "β
Set" if REPO_ID else "β Missing"
|
| 205 |
}
|
| 206 |
-
return config_status
|
| 207 |
|
| 208 |
-
@app.get("/
|
| 209 |
-
async def debug():
|
| 210 |
-
"""Debug endpoint to check file structure"""
|
| 211 |
-
import glob
|
| 212 |
-
|
| 213 |
-
debug_info = {
|
| 214 |
-
"current_directory": os.getcwd(),
|
| 215 |
-
"files_in_current_dir": os.listdir('.'),
|
| 216 |
-
"html_exists": os.path.exists('index.html'),
|
| 217 |
-
"css_exists": os.path.exists('style.css'),
|
| 218 |
-
"js_exists": os.path.exists('script.js'),
|
| 219 |
-
"session_file_exists": os.path.exists('my_session.session'),
|
| 220 |
-
"downloads_folder_exists": os.path.exists('downloads'),
|
| 221 |
-
"log_file_exists": os.path.exists('upload.log')
|
| 222 |
-
}
|
| 223 |
-
|
| 224 |
-
return debug_info
|
| 225 |
-
|
| 226 |
-
@app.get("/session-info")
|
| 227 |
async def session_info():
|
| 228 |
-
"""Check Telegram session status"""
|
| 229 |
-
if not client:
|
| 230 |
-
return {"error": "Client not initialized"}
|
| 231 |
-
|
| 232 |
try:
|
| 233 |
-
|
| 234 |
"session_file_exists": os.path.exists('my_session.session'),
|
| 235 |
"client_initialized": bool(client),
|
| 236 |
"session_file_size": os.path.getsize('my_session.session') if os.path.exists('my_session.session') else 0
|
| 237 |
}
|
| 238 |
-
return session_status
|
| 239 |
except Exception as e:
|
| 240 |
return {"error": str(e)}
|
| 241 |
-
|
| 242 |
-
if __name__ == '__main__':
|
| 243 |
-
print("Starting Hugging Face Uploader with FastAPI...")
|
| 244 |
-
print("Configuration status:")
|
| 245 |
-
print(f" API_ID: {'β
Set' if API_ID else 'β Missing'}")
|
| 246 |
-
print(f" API_HASH: {'β
Set' if API_HASH else 'β Missing'}")
|
| 247 |
-
print(f" HF_TOKEN: {'β
Set' if HF_TOKEN else 'β Missing'}")
|
| 248 |
-
print(f" CHANNEL_USERNAME: {'β
Set' if CHANNEL else 'β Missing'}")
|
| 249 |
-
print(f" DATASET_REPO: {'β
Set' if REPO_ID else 'β Missing'}")
|
| 250 |
-
print(f"\nFile structure:")
|
| 251 |
-
print(f" index.html exists: {os.path.exists('index.html')}")
|
| 252 |
-
print(f" style.css exists: {os.path.exists('style.css')}")
|
| 253 |
-
print(f" script.js exists: {os.path.exists('script.js')}")
|
| 254 |
-
print(f" Session file exists: {os.path.exists('my_session.session')}")
|
| 255 |
-
print("\nβ οΈ IMPORTANT: This application requires a pre-authenticated Telegram session.")
|
| 256 |
-
print(" You must create the session file locally first, then upload it to your deployment.")
|
| 257 |
-
print("\nTo configure, set environment variables in your deployment environment.")
|
| 258 |
-
print("Visit http://localhost:7860 to use the application.")
|
| 259 |
-
|
| 260 |
-
# Use port 7860 for Hugging Face Spaces compatibility
|
| 261 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
| 262 |
-
|
|
|
|
| 5 |
from telethon.errors import SessionPasswordNeededError, PhoneCodeInvalidError, AuthKeyError
|
| 6 |
from huggingface_hub import upload_file
|
| 7 |
from dotenv import load_dotenv
|
| 8 |
+
from fastapi import FastAPI, Form, Request, status
|
| 9 |
+
from fastapi.responses import HTMLResponse, JSONResponse, PlainTextResponse
|
| 10 |
from fastapi.staticfiles import StaticFiles
|
| 11 |
+
from fastapi.templating import Jinja2Templates
|
| 12 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 13 |
+
import threading
|
| 14 |
|
| 15 |
# === Load secrets from .env ===
|
| 16 |
load_dotenv()
|
|
|
|
| 48 |
def upload_to_dataset(filepath):
|
| 49 |
if not HF_TOKEN or not REPO_ID:
|
| 50 |
return False, f"β Missing Hugging Face credentials"
|
|
|
|
| 51 |
try:
|
| 52 |
upload_file(
|
| 53 |
path_or_fileobj=filepath,
|
|
|
|
| 62 |
logging.error(f"[!] Upload failed: {filepath} β {e}")
|
| 63 |
return False, f"β Upload failed: {os.path.basename(filepath)} β {e}"
|
| 64 |
|
| 65 |
+
# === Main file processing logic ===
|
| 66 |
async def process_filenames(name_input):
|
| 67 |
if not client:
|
| 68 |
return "β Error: Telegram client not initialized. Please check your API credentials."
|
| 69 |
+
|
| 70 |
if not CHANNEL:
|
| 71 |
return "β Error: Channel username not configured."
|
| 72 |
+
|
| 73 |
try:
|
|
|
|
| 74 |
if not client.is_connected():
|
| 75 |
await client.connect()
|
| 76 |
+
|
|
|
|
| 77 |
if not await client.is_user_authorized():
|
| 78 |
return "β Error: Telegram client not authorized. This application requires a pre-authenticated session file."
|
| 79 |
+
|
| 80 |
filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()]
|
| 81 |
results = []
|
| 82 |
found = set()
|
| 83 |
|
|
|
|
| 84 |
try:
|
| 85 |
messages = []
|
| 86 |
+
async for msg in client.iter_messages(CHANNEL, limit=30000):
|
| 87 |
messages.append(msg)
|
| 88 |
except Exception as e:
|
| 89 |
logging.error(f"Error iterating messages: {e}")
|
| 90 |
return f"β Error accessing channel messages: {str(e)}"
|
| 91 |
|
| 92 |
+
for msg in messages:
|
| 93 |
if msg.media and msg.file:
|
| 94 |
fname = msg.file.name or f"file_{msg.id}{msg.file.ext}"
|
| 95 |
|
|
|
|
| 116 |
results.append(f"β Not found: {name}")
|
| 117 |
|
| 118 |
return "\n".join(results) if results else "β No files matched."
|
| 119 |
+
|
| 120 |
except AuthKeyError:
|
|
|
|
| 121 |
return "β Error: Session authentication failed. The session file may be corrupted or expired."
|
| 122 |
except SessionPasswordNeededError:
|
|
|
|
| 123 |
return "β Error: Two-factor authentication is enabled. This application requires a pre-authenticated session."
|
| 124 |
except EOFError as e:
|
| 125 |
+
return f"β Error: Connection interrupted. {e}"
|
|
|
|
| 126 |
except Exception as e:
|
|
|
|
| 127 |
return f"β Error: {str(e)}"
|
| 128 |
|
| 129 |
+
def run_async_in_thread(coro):
|
| 130 |
+
def run_in_thread():
|
| 131 |
+
loop = asyncio.new_event_loop()
|
| 132 |
+
asyncio.set_event_loop(loop)
|
| 133 |
+
try:
|
| 134 |
+
return loop.run_until_complete(coro)
|
| 135 |
+
finally:
|
| 136 |
+
loop.close()
|
| 137 |
|
| 138 |
+
import concurrent.futures
|
| 139 |
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
| 140 |
+
future = executor.submit(run_in_thread)
|
| 141 |
+
return future.result()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
+
# === FastAPI App ===
|
| 144 |
+
basedir = os.path.abspath(os.path.expanduser("~"))
|
| 145 |
+
app = FastAPI()
|
| 146 |
+
|
| 147 |
+
app.add_middleware(
|
| 148 |
+
CORSMiddleware,
|
| 149 |
+
allow_origins=["*"],
|
| 150 |
+
allow_credentials=True,
|
| 151 |
+
allow_methods=["*"],
|
| 152 |
+
allow_headers=["*"],
|
| 153 |
+
)
|
| 154 |
|
| 155 |
+
app.mount("/static", StaticFiles(directory=basedir), name="static")
|
| 156 |
+
templates = Jinja2Templates(directory=basedir)
|
|
|
|
|
|
|
| 157 |
|
| 158 |
+
@app.get("/", response_class=HTMLResponse)
|
| 159 |
+
async def index(request: Request):
|
| 160 |
+
return templates.TemplateResponse("index.html", {"request": request})
|
| 161 |
+
|
| 162 |
+
@app.post("/upload", response_class=PlainTextResponse)
|
| 163 |
async def upload(filenames: str = Form(...)):
|
| 164 |
+
if not client:
|
| 165 |
+
return PlainTextResponse("\u274c Error: App not configured.", status_code=500)
|
| 166 |
+
results = run_async_in_thread(process_filenames(filenames))
|
| 167 |
+
return results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
+
@app.get("/health", response_class=JSONResponse)
|
| 170 |
async def health():
|
| 171 |
+
return {
|
|
|
|
| 172 |
"status": "healthy",
|
| 173 |
"message": "Hugging Face Uploader is running",
|
| 174 |
"configured": {
|
|
|
|
| 176 |
"huggingface": bool(HF_TOKEN and REPO_ID),
|
| 177 |
"channel": bool(CHANNEL)
|
| 178 |
},
|
| 179 |
+
"paths": {
|
| 180 |
+
"basedir": basedir,
|
| 181 |
+
"template_folder": basedir,
|
| 182 |
+
"static_folder": basedir
|
|
|
|
| 183 |
}
|
| 184 |
}
|
|
|
|
| 185 |
|
| 186 |
+
@app.get("/config", response_class=JSONResponse)
|
| 187 |
async def config():
|
| 188 |
+
return {
|
|
|
|
| 189 |
"API_ID": "β
Set" if API_ID else "β Missing",
|
| 190 |
+
"API_HASH": "β
Set" if API_HASH else "β Missing",
|
| 191 |
"HF_TOKEN": "β
Set" if HF_TOKEN else "β Missing",
|
| 192 |
"CHANNEL_USERNAME": "β
Set" if CHANNEL else "β Missing",
|
| 193 |
"DATASET_REPO": "β
Set" if REPO_ID else "β Missing"
|
| 194 |
}
|
|
|
|
| 195 |
|
| 196 |
+
@app.get("/session-info", response_class=JSONResponse)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
async def session_info():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
try:
|
| 199 |
+
return {
|
| 200 |
"session_file_exists": os.path.exists('my_session.session'),
|
| 201 |
"client_initialized": bool(client),
|
| 202 |
"session_file_size": os.path.getsize('my_session.session') if os.path.exists('my_session.session') else 0
|
| 203 |
}
|
|
|
|
| 204 |
except Exception as e:
|
| 205 |
return {"error": str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|