Fred808 commited on
Commit
302f5b9
Β·
verified Β·
1 Parent(s): 0984835

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import asyncio
4
+ from telethon import TelegramClient
5
+ from huggingface_hub import HfApi
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+ api_id = int(os.getenv("API_ID"))
11
+ api_hash = os.getenv("API_HASH")
12
+ hf_token = os.getenv("HF_TOKEN")
13
+ channel = os.getenv("CHANNEL_USERNAME")
14
+ dataset_repo = os.getenv("DATASET_REPO")
15
+
16
+ client = TelegramClient("my_session", api_id, api_hash)
17
+ hf_api = HfApi()
18
+ hf_api.set_access_token(hf_token)
19
+
20
+ os.makedirs("downloads", exist_ok=True)
21
+
22
+ async def process_filenames(name_input, progress=None):
23
+ await client.start()
24
+
25
+ filenames = [name.strip().lower() for name in name_input.replace(",", "\n").splitlines() if name.strip()]
26
+ results = []
27
+ found = set()
28
+
29
+ messages = [msg async for msg in client.iter_messages(channel, limit=300)]
30
+ total = len(messages)
31
+
32
+ for i, msg in enumerate(messages):
33
+ if progress:
34
+ progress(i / total, desc=f"Scanning message {i+1}/{total}...")
35
+
36
+ if msg.media and msg.file:
37
+ fname = msg.file.name or f"file_{msg.id}{msg.file.ext}"
38
+
39
+ for search in filenames:
40
+ if search in fname.lower() and fname not in found:
41
+ found.add(fname)
42
+ path = f"downloads/{fname}"
43
+
44
+ if not os.path.exists(path):
45
+ await msg.download_media(file=path)
46
+ hf_api.upload_file(
47
+ path_or_fileobj=path,
48
+ path_in_repo=os.path.basename(path),
49
+ repo_id=dataset_repo,
50
+ repo_type="dataset"
51
+ )
52
+ results.append(f"βœ… Uploaded: {fname}")
53
+ else:
54
+ results.append(f"⏩ Already exists: {fname}")
55
+ break
56
+
57
+ matched_names = [r.split(":")[-1].strip().lower() for r in results if r.startswith("βœ…") or r.startswith("⏩")]
58
+ for name in filenames:
59
+ if not any(name in matched for matched in matched_names):
60
+ results.append(f"❌ Not found: {name}")
61
+
62
+ return "\n".join(results) if results else "❌ No files matched."
63
+
64
+ def handle_upload(name_input):
65
+ with gr.Progress() as p:
66
+ return asyncio.run(process_filenames(name_input, p))
67
+
68
+ def launch_ui():
69
+ with gr.Blocks() as app:
70
+ gr.Markdown("## πŸ“₯ Telegram File Fetcher β†’ πŸ“€ HF Dataset Uploader")
71
+ input_box = gr.Textbox(label="Enter filenames (comma or newline separated)", lines=6)
72
+ result_box = gr.Textbox(label="Results", lines=10)
73
+ go = gr.Button("Start Upload")
74
+ go.click(fn=handle_upload, inputs=input_box, outputs=result_box)
75
+ return app
76
+
77
+ if __name__ == "__main__":
78
+ asyncio.run(client.connect())
79
+ launch_ui().launch(server_name="0.0.0.0", server_port=7860)