Fred808 commited on
Commit
3deb5a3
Β·
verified Β·
1 Parent(s): 2d52f0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -64
app.py CHANGED
@@ -1,20 +1,21 @@
1
  import os
2
- import gradio as gr
3
  import asyncio
4
  import logging
5
  from telethon import TelegramClient
6
  from huggingface_hub import upload_file
7
  from dotenv import load_dotenv
 
 
8
 
9
  # === Load secrets from .env ===
10
  load_dotenv()
11
 
12
- API_ID = int(os.getenv("API_ID"))
13
  API_HASH = os.getenv("API_HASH")
14
  HF_TOKEN = os.getenv("HF_TOKEN")
15
  CHANNEL = os.getenv("CHANNEL_USERNAME")
16
  REPO_ID = os.getenv("DATASET_REPO")
17
- DATA_PATH = "AEffects"
18
 
19
  # === Logging setup ===
20
  logging.basicConfig(
@@ -26,14 +27,23 @@ logging.basicConfig(
26
  ]
27
  )
28
 
29
- # === Setup Telegram Client ===
30
- client = TelegramClient("my_session", API_ID, API_HASH)
 
 
 
 
 
 
31
 
32
  # === Ensure download folder exists ===
33
  os.makedirs("downloads", exist_ok=True)
34
 
35
  # === Upload wrapper ===
36
  def upload_to_dataset(filepath):
 
 
 
37
  try:
38
  upload_file(
39
  path_or_fileobj=filepath,
@@ -49,62 +59,127 @@ def upload_to_dataset(filepath):
49
  return False, f"❌ Upload failed: {os.path.basename(filepath)} β€” {e}"
50
 
51
  # === Main file processing logic ===
52
- async def process_filenames(name_input, progress=None):
53
- await client.start()
54
-
55
- filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()]
56
- results = []
57
- found = set()
58
-
59
- messages = [msg async for msg in client.iter_messages(CHANNEL, limit=300)]
60
- total = len(messages)
61
-
62
- for i, msg in enumerate(messages):
63
- if progress:
64
- progress(i / total, desc=f"Scanning message {i+1}/{total}...")
65
-
66
- if msg.media and msg.file:
67
- fname = msg.file.name or f"file_{msg.id}{msg.file.ext}"
68
-
69
- for search in filenames:
70
- if search in fname.lower() and fname not in found:
71
- found.add(fname)
72
- path = f"downloads/{fname}"
73
-
74
- if not os.path.exists(path):
75
- await msg.download_media(file=path)
76
- success, msg_text = upload_to_dataset(path)
77
- results.append(msg_text)
78
- else:
79
- results.append(f"⏩ Already exists: {fname}")
80
- break
81
-
82
- matched_names = [r.split(":")[-1].strip().lower() for r in results if r.startswith("βœ…") or r.startswith("⏩")]
83
- for name in filenames:
84
- if not any(name in matched for matched in matched_names):
85
- results.append(f"❌ Not found: {name}")
86
-
87
- return "\n".join(results) if results else "❌ No files matched."
88
-
89
- # === Gradio async handler ===
90
- async def handle_upload(name_input):
91
- with gr.Progress() as p:
92
- return await process_filenames(name_input, p)
93
-
94
- # === Gradio UI ===
95
- def launch_ui():
96
- with gr.Blocks() as app:
97
- gr.Markdown("## Hugging Face Uploader")
98
- input_box = gr.Textbox(
99
- label="Enter filenames (comma or newline separated)",
100
- lines=6,
101
- placeholder="e.g.\nreport, summary.pdf\nmeeting_notes"
102
- )
103
- result_box = gr.Textbox(label="Upload Results", lines=12)
104
- btn = gr.Button("Start Upload")
105
- btn.click(fn=handle_upload, inputs=input_box, outputs=result_box)
106
- return app
107
-
108
- # === Launch ===
109
- if __name__ == "__main__":
110
- launch_ui().launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
 
2
  import asyncio
3
  import logging
4
  from telethon import TelegramClient
5
  from huggingface_hub import upload_file
6
  from dotenv import load_dotenv
7
+ from flask import Flask, request, render_template, jsonify
8
+ import threading
9
 
10
  # === Load secrets from .env ===
11
  load_dotenv()
12
 
13
+ API_ID = os.getenv("API_ID")
14
  API_HASH = os.getenv("API_HASH")
15
  HF_TOKEN = os.getenv("HF_TOKEN")
16
  CHANNEL = os.getenv("CHANNEL_USERNAME")
17
  REPO_ID = os.getenv("DATASET_REPO")
18
+ DATA_PATH = "telegram_uploads"
19
 
20
  # === Logging setup ===
21
  logging.basicConfig(
 
27
  ]
28
  )
29
 
30
+ # === Setup Telegram Client (only if credentials are available) ===
31
+ client = None
32
+ if API_ID and API_HASH:
33
+ try:
34
+ client = TelegramClient("my_session", int(API_ID), API_HASH)
35
+ except ValueError as e:
36
+ logging.error(f"Failed to initialize Telegram client: {e}")
37
+ client = None
38
 
39
  # === Ensure download folder exists ===
40
  os.makedirs("downloads", exist_ok=True)
41
 
42
  # === Upload wrapper ===
43
  def upload_to_dataset(filepath):
44
+ if not HF_TOKEN or not REPO_ID:
45
+ return False, f"❌ Missing Hugging Face credentials"
46
+
47
  try:
48
  upload_file(
49
  path_or_fileobj=filepath,
 
59
  return False, f"❌ Upload failed: {os.path.basename(filepath)} β€” {e}"
60
 
61
  # === Main file processing logic ===
62
+ async def process_filenames(name_input):
63
+ if not client:
64
+ return "❌ Error: Telegram client not initialized. Please check your API credentials."
65
+
66
+ if not CHANNEL:
67
+ return "❌ Error: Channel username not configured."
68
+
69
+ try:
70
+ await client.start()
71
+
72
+ filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()]
73
+ results = []
74
+ found = set()
75
+
76
+ messages = [msg async for msg in client.iter_messages(CHANNEL, limit=300)]
77
+ total = len(messages)
78
+
79
+ for i, msg in enumerate(messages):
80
+ if msg.media and msg.file:
81
+ fname = msg.file.name or f"file_{msg.id}{msg.file.ext}"
82
+
83
+ for search in filenames:
84
+ if search in fname.lower() and fname not in found:
85
+ found.add(fname)
86
+ path = f"downloads/{fname}"
87
+
88
+ if not os.path.exists(path):
89
+ await msg.download_media(file=path)
90
+ success, msg_text = upload_to_dataset(path)
91
+ results.append(msg_text)
92
+ else:
93
+ results.append(f"⏩ Already exists: {fname}")
94
+ break
95
+
96
+ matched_names = [r.split(":")[-1].strip().lower() for r in results if r.startswith("βœ…") or r.startswith("⏩")]
97
+ for name in filenames:
98
+ if not any(name in matched for matched in matched_names):
99
+ results.append(f"❌ Not found: {name}")
100
+
101
+ return "\n".join(results) if results else "❌ No files matched."
102
+
103
+ except Exception as e:
104
+ logging.error(f"Error in process_filenames: {e}")
105
+ return f"❌ Error: {str(e)}"
106
+
107
+ def run_async_in_thread(coro):
108
+ """Run async function in a separate thread with its own event loop"""
109
+ def run_in_thread():
110
+ loop = asyncio.new_event_loop()
111
+ asyncio.set_event_loop(loop)
112
+ try:
113
+ return loop.run_until_complete(coro)
114
+ finally:
115
+ loop.close()
116
+
117
+ import concurrent.futures
118
+ with concurrent.futures.ThreadPoolExecutor() as executor:
119
+ future = executor.submit(run_in_thread)
120
+ return future.result()
121
+
122
+ app = Flask(__name__)
123
+
124
+ @app.route('/')
125
+ def index():
126
+ return render_template('index.html')
127
+
128
+ @app.route('/upload', methods=['POST'])
129
+ def upload():
130
+ try:
131
+ filenames_input = request.form.get('filenames', '').strip()
132
+ if not filenames_input:
133
+ return "❌ Error: No filenames provided", 400
134
+
135
+ # Check if credentials are configured
136
+ if not client:
137
+ return "❌ Error: Application not configured. Please set up your .env file with API credentials.", 500
138
+
139
+ # Run the async function in a separate thread
140
+ results = run_async_in_thread(process_filenames(filenames_input))
141
+ return results
142
+
143
+ except Exception as e:
144
+ logging.error(f"Error in upload route: {e}")
145
+ return f"❌ Error: {str(e)}", 500
146
+
147
+ @app.route('/health')
148
+ def health():
149
+ status = {
150
+ "status": "healthy",
151
+ "message": "Hugging Face Uploader is running",
152
+ "configured": {
153
+ "telegram": bool(client),
154
+ "huggingface": bool(HF_TOKEN and REPO_ID),
155
+ "channel": bool(CHANNEL)
156
+ }
157
+ }
158
+ return jsonify(status)
159
+
160
+ @app.route('/config')
161
+ def config():
162
+ """Show configuration status"""
163
+ config_status = {
164
+ "API_ID": "βœ… Set" if API_ID else "❌ Missing",
165
+ "API_HASH": "βœ… Set" if API_HASH else "❌ Missing",
166
+ "HF_TOKEN": "βœ… Set" if HF_TOKEN else "❌ Missing",
167
+ "CHANNEL_USERNAME": "βœ… Set" if CHANNEL else "❌ Missing",
168
+ "DATASET_REPO": "βœ… Set" if REPO_ID else "❌ Missing"
169
+ }
170
+
171
+ return jsonify(config_status)
172
+
173
+ if __name__ == '__main__':
174
+ print("Starting Hugging Face Uploader...")
175
+ print("Configuration status:")
176
+ print(f" API_ID: {'βœ… Set' if API_ID else '❌ Missing'}")
177
+ print(f" API_HASH: {'βœ… Set' if API_HASH else '❌ Missing'}")
178
+ print(f" HF_TOKEN: {'βœ… Set' if HF_TOKEN else '❌ Missing'}")
179
+ print(f" CHANNEL_USERNAME: {'βœ… Set' if CHANNEL else '❌ Missing'}")
180
+ print(f" DATASET_REPO: {'βœ… Set' if REPO_ID else '❌ Missing'}")
181
+ print("\nTo configure, copy .env.example to .env and fill in your credentials.")
182
+ print("Visit http://localhost:5000 to use the application.")
183
+
184
+ app.run(host='0.0.0.0', port=5000, debug=True)
185
+