Fred808 commited on
Commit
5511284
Β·
verified Β·
1 Parent(s): ebf8a80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -26
app.py CHANGED
@@ -1,24 +1,54 @@
1
  import os
2
  import gradio as gr
3
  import asyncio
 
4
  from telethon import TelegramClient
5
- from huggingface_hub import HfApi
6
  from dotenv import load_dotenv
7
 
 
8
  load_dotenv()
9
 
10
- api_id = int(os.getenv("API_ID"))
11
- api_hash = os.getenv("API_HASH")
12
- hf_token = os.getenv("HF_TOKEN")
13
- channel = os.getenv("CHANNEL_USERNAME")
14
- dataset_repo = os.getenv("DATASET_REPO")
15
-
16
- client = TelegramClient("my_session", api_id, api_hash)
17
- hf_api = HfApi()
18
- hf_api.set_access_token(hf_token)
19
-
 
 
 
 
 
 
 
 
 
 
 
20
  os.makedirs("downloads", exist_ok=True)
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  async def process_filenames(name_input, progress=None):
23
  await client.start()
24
 
@@ -26,7 +56,7 @@ async def process_filenames(name_input, progress=None):
26
  results = []
27
  found = set()
28
 
29
- messages = [msg async for msg in client.iter_messages(channel, limit=300)]
30
  total = len(messages)
31
 
32
  for i, msg in enumerate(messages):
@@ -43,15 +73,8 @@ async def process_filenames(name_input, progress=None):
43
 
44
  if not os.path.exists(path):
45
  await msg.download_media(file=path)
46
- hf_api.upload_file(
47
- path_or_fileobj=path,
48
- path_in_repo=os.path.basename(path),
49
- repo_id=dataset_repo,
50
- repo_type="dataset",
51
- token=hf_token # Pass token here
52
- )
53
-
54
- results.append(f"βœ… Uploaded: {fname}")
55
  else:
56
  results.append(f"⏩ Already exists: {fname}")
57
  break
@@ -63,17 +86,23 @@ async def process_filenames(name_input, progress=None):
63
 
64
  return "\n".join(results) if results else "❌ No files matched."
65
 
 
66
  def handle_upload(name_input):
67
  with gr.Progress() as p:
68
  return asyncio.run(process_filenames(name_input, p))
69
 
 
70
  def launch_ui():
71
  with gr.Blocks() as app:
72
- gr.Markdown("## πŸ“₯ Telegram File Fetcher β†’ πŸ“€ HF Dataset Uploader")
73
- input_box = gr.Textbox(label="Enter filenames (comma or newline separated)", lines=6)
74
- result_box = gr.Textbox(label="Results", lines=10)
75
- go = gr.Button("Start Upload")
76
- go.click(fn=handle_upload, inputs=input_box, outputs=result_box)
 
 
 
 
77
  return app
78
 
79
  if __name__ == "__main__":
 
1
  import os
2
  import gradio as gr
3
  import asyncio
4
+ import logging
5
  from telethon import TelegramClient
6
+ from huggingface_hub import upload_file
7
  from dotenv import load_dotenv
8
 
9
+ # === Load secrets from .env ===
10
  load_dotenv()
11
 
12
+ API_ID = int(os.getenv("API_ID"))
13
+ API_HASH = os.getenv("API_HASH")
14
+ HF_TOKEN = os.getenv("HF_TOKEN")
15
+ CHANNEL = os.getenv("CHANNEL_USERNAME")
16
+ REPO_ID = os.getenv("DATASET_REPO")
17
+ DATA_PATH = "telegram_uploads"
18
+
19
+ # === Logging setup ===
20
+ logging.basicConfig(
21
+ level=logging.INFO,
22
+ format="%(asctime)s β€” %(levelname)s β€” %(message)s",
23
+ handlers=[
24
+ logging.FileHandler("upload.log"),
25
+ logging.StreamHandler()
26
+ ]
27
+ )
28
+
29
+ # === Setup Telegram Client ===
30
+ client = TelegramClient("my_session", API_ID, API_HASH)
31
+
32
+ # === Ensure download folder exists ===
33
  os.makedirs("downloads", exist_ok=True)
34
 
35
+ # === Upload wrapper ===
36
+ def upload_to_dataset(filepath):
37
+ try:
38
+ upload_file(
39
+ path_or_fileobj=filepath,
40
+ path_in_repo=f"{DATA_PATH}/{os.path.basename(filepath)}",
41
+ repo_id=REPO_ID,
42
+ repo_type="dataset",
43
+ token=HF_TOKEN
44
+ )
45
+ logging.info(f"[↑] Uploaded: {filepath}")
46
+ return True, f"βœ… Uploaded: {os.path.basename(filepath)}"
47
+ except Exception as e:
48
+ logging.error(f"[!] Upload failed: {filepath} β€” {e}")
49
+ return False, f"❌ Upload failed: {os.path.basename(filepath)} β€” {e}"
50
+
51
+ # === Main file processing logic ===
52
  async def process_filenames(name_input, progress=None):
53
  await client.start()
54
 
 
56
  results = []
57
  found = set()
58
 
59
+ messages = [msg async for msg in client.iter_messages(CHANNEL, limit=300)]
60
  total = len(messages)
61
 
62
  for i, msg in enumerate(messages):
 
73
 
74
  if not os.path.exists(path):
75
  await msg.download_media(file=path)
76
+ success, msg_text = upload_to_dataset(path)
77
+ results.append(msg_text)
 
 
 
 
 
 
 
78
  else:
79
  results.append(f"⏩ Already exists: {fname}")
80
  break
 
86
 
87
  return "\n".join(results) if results else "❌ No files matched."
88
 
89
+ # === Gradio UI handler ===
90
  def handle_upload(name_input):
91
  with gr.Progress() as p:
92
  return asyncio.run(process_filenames(name_input, p))
93
 
94
+ # === Gradio UI ===
95
  def launch_ui():
96
  with gr.Blocks() as app:
97
+ gr.Markdown("## πŸ“₯ Telegram File Fetcher β†’ πŸ“€ Hugging Face Dataset Uploader")
98
+ input_box = gr.Textbox(
99
+ label="Enter filenames (comma or newline separated)",
100
+ lines=6,
101
+ placeholder="e.g.\nreport, summary.pdf\nmeeting_notes"
102
+ )
103
+ result_box = gr.Textbox(label="Upload Results", lines=12)
104
+ btn = gr.Button("Start Upload")
105
+ btn.click(fn=handle_upload, inputs=input_box, outputs=result_box)
106
  return app
107
 
108
  if __name__ == "__main__":