ScottzillaSystems commited on
Commit
eb7710c
Β·
verified Β·
1 Parent(s): 6ff0a8c

Add GitHub to HF Spaces importer app

Browse files
Files changed (1) hide show
  1. app.py +316 -0
app.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import subprocess
3
+ import tempfile
4
+ import shutil
5
+ import os
6
+ import re
7
+ from huggingface_hub import HfApi
8
+
9
+
10
+ def validate_github_url(url):
11
+ """Validate and normalize GitHub URL."""
12
+ url = url.strip()
13
+ pattern = r"^https?://github\.com/[\w\-\.]+/[\w\-\.]+(/.*)?$"
14
+ if not re.match(pattern, url):
15
+ return None
16
+ # Remove trailing .git and slashes
17
+ url = re.sub(r"\.git$", "", url)
18
+ url = url.rstrip("/")
19
+ return url
20
+
21
+
22
+ def extract_repo_name(github_url):
23
+ """Extract owner/repo from GitHub URL."""
24
+ parts = github_url.rstrip("/").split("/")
25
+ if len(parts) >= 5:
26
+ return parts[3], parts[4].replace(".git", "")
27
+ return None, None
28
+
29
+
30
+ def detect_sdk(tmpdir):
31
+ """Auto-detect the best SDK for the project."""
32
+ files = os.listdir(tmpdir)
33
+ filenames_lower = [f.lower() for f in files]
34
+
35
+ # Check for Dockerfile
36
+ if "dockerfile" in filenames_lower or "Dockerfile" in files:
37
+ return "docker"
38
+
39
+ # Check for Gradio
40
+ for root, dirs, fnames in os.walk(tmpdir):
41
+ for fname in fnames:
42
+ if fname.endswith(".py"):
43
+ try:
44
+ content = open(os.path.join(root, fname), "r", errors="replace").read()
45
+ if "import gradio" in content or "from gradio" in content:
46
+ return "gradio"
47
+ if "import streamlit" in content or "from streamlit" in content:
48
+ return "streamlit"
49
+ except:
50
+ pass
51
+
52
+ # Check requirements.txt
53
+ req_path = os.path.join(tmpdir, "requirements.txt")
54
+ if os.path.exists(req_path):
55
+ try:
56
+ reqs = open(req_path, "r").read().lower()
57
+ if "gradio" in reqs:
58
+ return "gradio"
59
+ if "streamlit" in reqs:
60
+ return "streamlit"
61
+ except:
62
+ pass
63
+
64
+ # Check for static site files
65
+ if "index.html" in filenames_lower:
66
+ return "static"
67
+
68
+ return "gradio" # Default
69
+
70
+
71
+ def list_files_preview(tmpdir, max_files=50):
72
+ """Create a file tree preview."""
73
+ lines = []
74
+ count = 0
75
+ for root, dirs, files in os.walk(tmpdir):
76
+ # Skip hidden dirs
77
+ dirs[:] = [d for d in dirs if not d.startswith(".")]
78
+ level = root.replace(tmpdir, "").count(os.sep)
79
+ indent = " " * level
80
+ dirname = os.path.basename(root)
81
+ if level > 0:
82
+ lines.append(f"{indent}πŸ“ {dirname}/")
83
+ for f in sorted(files):
84
+ if count >= max_files:
85
+ lines.append(f"\n... and more files (showing first {max_files})")
86
+ return "\n".join(lines)
87
+ file_indent = " " * (level + 1)
88
+ fpath = os.path.join(root, f)
89
+ size = os.path.getsize(fpath)
90
+ size_str = f"{size / 1024:.1f}KB" if size > 1024 else f"{size}B"
91
+ lines.append(f"{file_indent}πŸ“„ {f} ({size_str})")
92
+ count += 1
93
+ return "\n".join(lines) if lines else "(empty repository)"
94
+
95
+
96
+ def import_github_to_hf(github_url, hf_space_id, sdk_choice, hf_token, private, branch):
97
+ """Clone a GitHub repo and push it to a Hugging Face Space."""
98
+ # Validate inputs
99
+ if not github_url or not github_url.strip():
100
+ yield "❌ Please enter a GitHub repository URL.", ""
101
+ return
102
+
103
+ github_url = validate_github_url(github_url)
104
+ if github_url is None:
105
+ yield "❌ Invalid GitHub URL. Expected format: `https://github.com/owner/repo`", ""
106
+ return
107
+
108
+ if not hf_token or not hf_token.strip():
109
+ yield "❌ Please enter your Hugging Face token.", ""
110
+ return
111
+
112
+ owner, repo_name = extract_repo_name(github_url)
113
+ if not owner or not repo_name:
114
+ yield "❌ Could not parse owner/repo from URL.", ""
115
+ return
116
+
117
+ # Default space name
118
+ if not hf_space_id or not hf_space_id.strip():
119
+ try:
120
+ api = HfApi(token=hf_token.strip())
121
+ user_info = api.whoami()
122
+ username = user_info.get("name", user_info.get("user", "user"))
123
+ hf_space_id = f"{username}/{repo_name}"
124
+ except Exception as e:
125
+ yield f"❌ Could not determine your HF username: {e}", ""
126
+ return
127
+
128
+ hf_space_id = hf_space_id.strip()
129
+
130
+ yield f"πŸ”„ **Step 1/4:** Cloning `{github_url}`...", ""
131
+
132
+ # Clone
133
+ tmpdir = tempfile.mkdtemp()
134
+ try:
135
+ clone_cmd = ["git", "clone", "--depth=1"]
136
+ if branch and branch.strip():
137
+ clone_cmd += ["-b", branch.strip()]
138
+ clone_cmd += [github_url, tmpdir]
139
+
140
+ result = subprocess.run(
141
+ clone_cmd,
142
+ capture_output=True, text=True, timeout=120,
143
+ )
144
+ if result.returncode != 0:
145
+ yield f"❌ Git clone failed:\n```\n{result.stderr}\n```", ""
146
+ return
147
+
148
+ # Remove .git directory
149
+ git_dir = os.path.join(tmpdir, ".git")
150
+ if os.path.exists(git_dir):
151
+ shutil.rmtree(git_dir)
152
+
153
+ # File preview
154
+ file_tree = list_files_preview(tmpdir)
155
+
156
+ yield f"βœ… **Step 1/4:** Cloned successfully.\nπŸ”„ **Step 2/4:** Detecting SDK...", f"```\n{file_tree}\n```"
157
+
158
+ # Detect or use chosen SDK
159
+ if sdk_choice == "auto-detect":
160
+ detected_sdk = detect_sdk(tmpdir)
161
+ sdk_to_use = detected_sdk
162
+ sdk_msg = f"Auto-detected: **{detected_sdk}**"
163
+ else:
164
+ sdk_to_use = sdk_choice
165
+ sdk_msg = f"Using selected: **{sdk_choice}**"
166
+
167
+ yield (
168
+ f"βœ… **Step 1/4:** Cloned successfully.\n"
169
+ f"βœ… **Step 2/4:** {sdk_msg}\n"
170
+ f"πŸ”„ **Step 3/4:** Creating Space `{hf_space_id}`...",
171
+ f"```\n{file_tree}\n```"
172
+ )
173
+
174
+ # Create Space
175
+ api = HfApi(token=hf_token.strip())
176
+ try:
177
+ api.create_repo(
178
+ repo_id=hf_space_id,
179
+ repo_type="space",
180
+ space_sdk=sdk_to_use,
181
+ private=private,
182
+ exist_ok=True,
183
+ )
184
+ except Exception as e:
185
+ yield f"❌ Failed to create Space: {e}", f"```\n{file_tree}\n```"
186
+ return
187
+
188
+ yield (
189
+ f"βœ… **Step 1/4:** Cloned successfully.\n"
190
+ f"βœ… **Step 2/4:** {sdk_msg}\n"
191
+ f"βœ… **Step 3/4:** Space created.\n"
192
+ f"πŸ”„ **Step 4/4:** Uploading files to `{hf_space_id}`...",
193
+ f"```\n{file_tree}\n```"
194
+ )
195
+
196
+ # Upload
197
+ try:
198
+ api.upload_folder(
199
+ folder_path=tmpdir,
200
+ repo_id=hf_space_id,
201
+ repo_type="space",
202
+ commit_message=f"Import from {github_url}",
203
+ ignore_patterns=[
204
+ "*.pyc", "__pycache__/", ".git/", ".gitignore",
205
+ ".env", ".env.*", "*.log", ".DS_Store", "node_modules/",
206
+ ],
207
+ )
208
+ except Exception as e:
209
+ yield f"❌ Failed to upload files: {e}", f"```\n{file_tree}\n```"
210
+ return
211
+
212
+ space_url = f"https://huggingface.co/spaces/{hf_space_id}"
213
+
214
+ yield (
215
+ f"## βœ… Import Complete!\n\n"
216
+ f"| Detail | Value |\n"
217
+ f"|--------|-------|\n"
218
+ f"| **Source** | [{github_url}]({github_url}) |\n"
219
+ f"| **Space** | [{hf_space_id}]({space_url}) |\n"
220
+ f"| **SDK** | {sdk_to_use} |\n"
221
+ f"| **Visibility** | {'Private πŸ”’' if private else 'Public 🌍'} |\n\n"
222
+ f"πŸ”— **[Open your Space β†’]({space_url})**",
223
+ f"```\n{file_tree}\n```"
224
+ )
225
+
226
+ except subprocess.TimeoutExpired:
227
+ yield "❌ Git clone timed out after 120 seconds. The repository may be too large.", ""
228
+ except Exception as e:
229
+ yield f"❌ Unexpected error: {e}", ""
230
+ finally:
231
+ if os.path.exists(tmpdir):
232
+ shutil.rmtree(tmpdir, ignore_errors=True)
233
+
234
+
235
+ # ─── Gradio UI ───────────────────────────────────────────
236
+
237
+ with gr.Blocks(
238
+ title="πŸš€ GitHub β†’ HF Spaces Importer",
239
+ theme=gr.themes.Soft(),
240
+ ) as demo:
241
+ gr.Markdown("""
242
+ # πŸš€ GitHub β†’ Hugging Face Spaces Importer
243
+
244
+ Import any public GitHub repository directly into a Hugging Face Space.
245
+ The tool clones the repo, auto-detects the SDK, creates the Space, and uploads all files.
246
+ """)
247
+
248
+ with gr.Row():
249
+ with gr.Column(scale=2):
250
+ github_url_input = gr.Textbox(
251
+ label="GitHub Repository URL",
252
+ placeholder="https://github.com/owner/repo",
253
+ info="Public GitHub repository URL",
254
+ )
255
+ with gr.Column(scale=1):
256
+ branch_input = gr.Textbox(
257
+ label="Branch (optional)",
258
+ placeholder="main",
259
+ info="Leave empty for default branch",
260
+ )
261
+
262
+ with gr.Row():
263
+ with gr.Column(scale=2):
264
+ space_id_input = gr.Textbox(
265
+ label="HF Space ID (optional)",
266
+ placeholder="your-username/space-name",
267
+ info="Leave empty to auto-generate from repo name",
268
+ )
269
+ with gr.Column(scale=1):
270
+ sdk_dropdown = gr.Dropdown(
271
+ choices=["auto-detect", "gradio", "streamlit", "docker", "static"],
272
+ value="auto-detect",
273
+ label="Space SDK",
274
+ info="Auto-detect scans for framework imports",
275
+ )
276
+
277
+ with gr.Row():
278
+ with gr.Column(scale=2):
279
+ token_input = gr.Textbox(
280
+ label="Hugging Face Token",
281
+ type="password",
282
+ placeholder="hf_...",
283
+ info="Needs write access. Get one at huggingface.co/settings/tokens",
284
+ )
285
+ with gr.Column(scale=1):
286
+ private_checkbox = gr.Checkbox(
287
+ label="Private Space",
288
+ value=False,
289
+ )
290
+
291
+ import_btn = gr.Button("πŸš€ Import to Hugging Face Spaces", variant="primary", size="lg")
292
+
293
+ with gr.Row():
294
+ with gr.Column(scale=2):
295
+ status_output = gr.Markdown(label="Status")
296
+ with gr.Column(scale=1):
297
+ files_output = gr.Markdown(label="Repository Files")
298
+
299
+ gr.Markdown("""
300
+ ---
301
+ ### ℹ️ Notes
302
+ - Only **public** GitHub repositories are supported (no authentication for GitHub).
303
+ - Your HF token needs **write** permissions to create Spaces.
304
+ - Large repositories may take longer to clone and upload.
305
+ - The `.git` directory and common non-essential files (`.env`, `node_modules`, etc.) are excluded.
306
+ - SDK auto-detection checks for Gradio/Streamlit imports, Dockerfiles, and `index.html`.
307
+ """)
308
+
309
+ import_btn.click(
310
+ fn=import_github_to_hf,
311
+ inputs=[github_url_input, space_id_input, sdk_dropdown, token_input, private_checkbox, branch_input],
312
+ outputs=[status_output, files_output],
313
+ )
314
+
315
+ if __name__ == "__main__":
316
+ demo.launch()