Fred808 commited on
Commit
5e235e5
Β·
verified Β·
1 Parent(s): f86af78

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -116
app.py CHANGED
@@ -5,10 +5,12 @@ from telethon import TelegramClient
5
  from telethon.errors import SessionPasswordNeededError, PhoneCodeInvalidError, AuthKeyError
6
  from huggingface_hub import upload_file
7
  from dotenv import load_dotenv
8
- from fastapi import FastAPI, Form, HTTPException
9
- from fastapi.responses import HTMLResponse, FileResponse
10
  from fastapi.staticfiles import StaticFiles
11
- import uvicorn
 
 
12
 
13
  # === Load secrets from .env ===
14
  load_dotenv()
@@ -46,7 +48,6 @@ os.makedirs("downloads", exist_ok=True)
46
  def upload_to_dataset(filepath):
47
  if not HF_TOKEN or not REPO_ID:
48
  return False, f"❌ Missing Hugging Face credentials"
49
-
50
  try:
51
  upload_file(
52
  path_or_fileobj=filepath,
@@ -61,37 +62,34 @@ def upload_to_dataset(filepath):
61
  logging.error(f"[!] Upload failed: {filepath} β€” {e}")
62
  return False, f"❌ Upload failed: {os.path.basename(filepath)} β€” {e}"
63
 
64
- # === Main file processing logic with improved error handling ===
65
  async def process_filenames(name_input):
66
  if not client:
67
  return "❌ Error: Telegram client not initialized. Please check your API credentials."
68
-
69
  if not CHANNEL:
70
  return "❌ Error: Channel username not configured."
71
-
72
  try:
73
- # Check if client is already connected
74
  if not client.is_connected():
75
  await client.connect()
76
-
77
- # Check if we're authorized
78
  if not await client.is_user_authorized():
79
  return "❌ Error: Telegram client not authorized. This application requires a pre-authenticated session file."
80
-
81
  filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()]
82
  results = []
83
  found = set()
84
 
85
- # Use a more conservative approach to message iteration
86
  try:
87
  messages = []
88
- async for msg in client.iter_messages(CHANNEL, limit=300):
89
  messages.append(msg)
90
  except Exception as e:
91
  logging.error(f"Error iterating messages: {e}")
92
  return f"❌ Error accessing channel messages: {str(e)}"
93
 
94
- for i, msg in enumerate(messages):
95
  if msg.media and msg.file:
96
  fname = msg.file.name or f"file_{msg.id}{msg.file.ext}"
97
 
@@ -118,65 +116,59 @@ async def process_filenames(name_input):
118
  results.append(f"❌ Not found: {name}")
119
 
120
  return "\n".join(results) if results else "❌ No files matched."
121
-
122
  except AuthKeyError:
123
- logging.error("Auth key error - session may be corrupted")
124
  return "❌ Error: Session authentication failed. The session file may be corrupted or expired."
125
  except SessionPasswordNeededError:
126
- logging.error("Two-factor authentication required")
127
  return "❌ Error: Two-factor authentication is enabled. This application requires a pre-authenticated session."
128
  except EOFError as e:
129
- logging.error(f"EOF Error: {e}")
130
- return "❌ Error: Connection interrupted. This may be due to network issues or session problems."
131
  except Exception as e:
132
- logging.error(f"Error in process_filenames: {e}")
133
  return f"❌ Error: {str(e)}"
134
 
135
- # === FastAPI App ===
136
- app = FastAPI(title="Hugging Face Uploader", description="Upload files from Telegram to Hugging Face datasets")
 
 
 
 
 
 
137
 
138
- @app.get("/", response_class=HTMLResponse)
139
- async def index():
140
- """Serve the main HTML page"""
141
- try:
142
- with open("index.html", "r", encoding="utf-8") as f:
143
- return HTMLResponse(content=f.read())
144
- except FileNotFoundError:
145
- raise HTTPException(status_code=404, detail="index.html not found")
146
 
147
- @app.get("/style.css")
148
- async def get_css():
149
- """Serve the CSS file"""
150
- return FileResponse("style.css", media_type="text/css")
 
 
 
 
 
 
 
151
 
152
- @app.get("/script.js")
153
- async def get_js():
154
- """Serve the JavaScript file"""
155
- return FileResponse("script.js", media_type="application/javascript")
156
 
157
- @app.post("/upload")
 
 
 
 
158
  async def upload(filenames: str = Form(...)):
159
- """Handle file upload requests"""
160
- try:
161
- if not filenames.strip():
162
- raise HTTPException(status_code=400, detail="❌ Error: No filenames provided")
163
-
164
- # Check if credentials are configured
165
- if not client:
166
- raise HTTPException(status_code=500, detail="❌ Error: Application not configured. Please set up your environment variables with API credentials.")
167
-
168
- # Process filenames using the async function
169
- results = await process_filenames(filenames)
170
- return {"results": results}
171
-
172
- except Exception as e:
173
- logging.error(f"Error in upload route: {e}")
174
- raise HTTPException(status_code=500, detail=f"❌ Error: {str(e)}")
175
 
176
- @app.get("/health")
177
  async def health():
178
- """Health check endpoint"""
179
- status = {
180
  "status": "healthy",
181
  "message": "Hugging Face Uploader is running",
182
  "configured": {
@@ -184,79 +176,30 @@ async def health():
184
  "huggingface": bool(HF_TOKEN and REPO_ID),
185
  "channel": bool(CHANNEL)
186
  },
187
- "files": {
188
- "index_html_exists": os.path.exists("index.html"),
189
- "style_css_exists": os.path.exists("style.css"),
190
- "script_js_exists": os.path.exists("script.js"),
191
- "session_file_exists": os.path.exists("my_session.session")
192
  }
193
  }
194
- return status
195
 
196
- @app.get("/config")
197
  async def config():
198
- """Show configuration status"""
199
- config_status = {
200
  "API_ID": "βœ… Set" if API_ID else "❌ Missing",
201
- "API_HASH": "βœ… Set" if API_HASH else "❌ Missing",
202
  "HF_TOKEN": "βœ… Set" if HF_TOKEN else "❌ Missing",
203
  "CHANNEL_USERNAME": "βœ… Set" if CHANNEL else "❌ Missing",
204
  "DATASET_REPO": "βœ… Set" if REPO_ID else "❌ Missing"
205
  }
206
- return config_status
207
 
208
- @app.get("/debug")
209
- async def debug():
210
- """Debug endpoint to check file structure"""
211
- import glob
212
-
213
- debug_info = {
214
- "current_directory": os.getcwd(),
215
- "files_in_current_dir": os.listdir('.'),
216
- "html_exists": os.path.exists('index.html'),
217
- "css_exists": os.path.exists('style.css'),
218
- "js_exists": os.path.exists('script.js'),
219
- "session_file_exists": os.path.exists('my_session.session'),
220
- "downloads_folder_exists": os.path.exists('downloads'),
221
- "log_file_exists": os.path.exists('upload.log')
222
- }
223
-
224
- return debug_info
225
-
226
- @app.get("/session-info")
227
  async def session_info():
228
- """Check Telegram session status"""
229
- if not client:
230
- return {"error": "Client not initialized"}
231
-
232
  try:
233
- session_status = {
234
  "session_file_exists": os.path.exists('my_session.session'),
235
  "client_initialized": bool(client),
236
  "session_file_size": os.path.getsize('my_session.session') if os.path.exists('my_session.session') else 0
237
  }
238
- return session_status
239
  except Exception as e:
240
  return {"error": str(e)}
241
-
242
- if __name__ == '__main__':
243
- print("Starting Hugging Face Uploader with FastAPI...")
244
- print("Configuration status:")
245
- print(f" API_ID: {'βœ… Set' if API_ID else '❌ Missing'}")
246
- print(f" API_HASH: {'βœ… Set' if API_HASH else '❌ Missing'}")
247
- print(f" HF_TOKEN: {'βœ… Set' if HF_TOKEN else '❌ Missing'}")
248
- print(f" CHANNEL_USERNAME: {'βœ… Set' if CHANNEL else '❌ Missing'}")
249
- print(f" DATASET_REPO: {'βœ… Set' if REPO_ID else '❌ Missing'}")
250
- print(f"\nFile structure:")
251
- print(f" index.html exists: {os.path.exists('index.html')}")
252
- print(f" style.css exists: {os.path.exists('style.css')}")
253
- print(f" script.js exists: {os.path.exists('script.js')}")
254
- print(f" Session file exists: {os.path.exists('my_session.session')}")
255
- print("\n⚠️ IMPORTANT: This application requires a pre-authenticated Telegram session.")
256
- print(" You must create the session file locally first, then upload it to your deployment.")
257
- print("\nTo configure, set environment variables in your deployment environment.")
258
- print("Visit http://localhost:7860 to use the application.")
259
-
260
- # Use port 7860 for Hugging Face Spaces compatibility
261
- uvicorn.run(app, host="0.0.0.0", port=7860)
262
-
 
5
  from telethon.errors import SessionPasswordNeededError, PhoneCodeInvalidError, AuthKeyError
6
  from huggingface_hub import upload_file
7
  from dotenv import load_dotenv
8
+ from fastapi import FastAPI, Form, Request, status
9
+ from fastapi.responses import HTMLResponse, JSONResponse, PlainTextResponse
10
  from fastapi.staticfiles import StaticFiles
11
+ from fastapi.templating import Jinja2Templates
12
+ from fastapi.middleware.cors import CORSMiddleware
13
+ import threading
14
 
15
  # === Load secrets from .env ===
16
  load_dotenv()
 
48
  def upload_to_dataset(filepath):
49
  if not HF_TOKEN or not REPO_ID:
50
  return False, f"❌ Missing Hugging Face credentials"
 
51
  try:
52
  upload_file(
53
  path_or_fileobj=filepath,
 
62
  logging.error(f"[!] Upload failed: {filepath} β€” {e}")
63
  return False, f"❌ Upload failed: {os.path.basename(filepath)} β€” {e}"
64
 
65
+ # === Main file processing logic ===
66
  async def process_filenames(name_input):
67
  if not client:
68
  return "❌ Error: Telegram client not initialized. Please check your API credentials."
69
+
70
  if not CHANNEL:
71
  return "❌ Error: Channel username not configured."
72
+
73
  try:
 
74
  if not client.is_connected():
75
  await client.connect()
76
+
 
77
  if not await client.is_user_authorized():
78
  return "❌ Error: Telegram client not authorized. This application requires a pre-authenticated session file."
79
+
80
  filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()]
81
  results = []
82
  found = set()
83
 
 
84
  try:
85
  messages = []
86
+ async for msg in client.iter_messages(CHANNEL, limit=30000):
87
  messages.append(msg)
88
  except Exception as e:
89
  logging.error(f"Error iterating messages: {e}")
90
  return f"❌ Error accessing channel messages: {str(e)}"
91
 
92
+ for msg in messages:
93
  if msg.media and msg.file:
94
  fname = msg.file.name or f"file_{msg.id}{msg.file.ext}"
95
 
 
116
  results.append(f"❌ Not found: {name}")
117
 
118
  return "\n".join(results) if results else "❌ No files matched."
119
+
120
  except AuthKeyError:
 
121
  return "❌ Error: Session authentication failed. The session file may be corrupted or expired."
122
  except SessionPasswordNeededError:
 
123
  return "❌ Error: Two-factor authentication is enabled. This application requires a pre-authenticated session."
124
  except EOFError as e:
125
+ return f"❌ Error: Connection interrupted. {e}"
 
126
  except Exception as e:
 
127
  return f"❌ Error: {str(e)}"
128
 
129
+ def run_async_in_thread(coro):
130
+ def run_in_thread():
131
+ loop = asyncio.new_event_loop()
132
+ asyncio.set_event_loop(loop)
133
+ try:
134
+ return loop.run_until_complete(coro)
135
+ finally:
136
+ loop.close()
137
 
138
+ import concurrent.futures
139
+ with concurrent.futures.ThreadPoolExecutor() as executor:
140
+ future = executor.submit(run_in_thread)
141
+ return future.result()
 
 
 
 
142
 
143
+ # === FastAPI App ===
144
+ basedir = os.path.abspath(os.path.expanduser("~"))
145
+ app = FastAPI()
146
+
147
+ app.add_middleware(
148
+ CORSMiddleware,
149
+ allow_origins=["*"],
150
+ allow_credentials=True,
151
+ allow_methods=["*"],
152
+ allow_headers=["*"],
153
+ )
154
 
155
+ app.mount("/static", StaticFiles(directory=basedir), name="static")
156
+ templates = Jinja2Templates(directory=basedir)
 
 
157
 
158
+ @app.get("/", response_class=HTMLResponse)
159
+ async def index(request: Request):
160
+ return templates.TemplateResponse("index.html", {"request": request})
161
+
162
+ @app.post("/upload", response_class=PlainTextResponse)
163
  async def upload(filenames: str = Form(...)):
164
+ if not client:
165
+ return PlainTextResponse("\u274c Error: App not configured.", status_code=500)
166
+ results = run_async_in_thread(process_filenames(filenames))
167
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ @app.get("/health", response_class=JSONResponse)
170
  async def health():
171
+ return {
 
172
  "status": "healthy",
173
  "message": "Hugging Face Uploader is running",
174
  "configured": {
 
176
  "huggingface": bool(HF_TOKEN and REPO_ID),
177
  "channel": bool(CHANNEL)
178
  },
179
+ "paths": {
180
+ "basedir": basedir,
181
+ "template_folder": basedir,
182
+ "static_folder": basedir
 
183
  }
184
  }
 
185
 
186
+ @app.get("/config", response_class=JSONResponse)
187
  async def config():
188
+ return {
 
189
  "API_ID": "βœ… Set" if API_ID else "❌ Missing",
190
+ "API_HASH": "βœ… Set" if API_HASH else "❌ Missing",
191
  "HF_TOKEN": "βœ… Set" if HF_TOKEN else "❌ Missing",
192
  "CHANNEL_USERNAME": "βœ… Set" if CHANNEL else "❌ Missing",
193
  "DATASET_REPO": "βœ… Set" if REPO_ID else "❌ Missing"
194
  }
 
195
 
196
+ @app.get("/session-info", response_class=JSONResponse)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  async def session_info():
 
 
 
 
198
  try:
199
+ return {
200
  "session_file_exists": os.path.exists('my_session.session'),
201
  "client_initialized": bool(client),
202
  "session_file_size": os.path.getsize('my_session.session') if os.path.exists('my_session.session') else 0
203
  }
 
204
  except Exception as e:
205
  return {"error": str(e)}