Spaces:
Running
Running
fetch all files
Browse files
app.py
CHANGED
|
@@ -6838,6 +6838,133 @@ Type: Transformers.js Application
|
|
| 6838 |
|
| 6839 |
return combined
|
| 6840 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6841 |
def fetch_hf_space_content(username: str, project_name: str) -> str:
|
| 6842 |
"""Fetch content from a Hugging Face Space"""
|
| 6843 |
try:
|
|
@@ -6853,70 +6980,53 @@ def fetch_hf_space_content(username: str, project_name: str) -> str:
|
|
| 6853 |
files = fetch_transformers_js_files(api, username, project_name)
|
| 6854 |
return combine_transformers_js_files(files, username, project_name)
|
| 6855 |
|
| 6856 |
-
#
|
| 6857 |
sdk = space_info.sdk
|
| 6858 |
-
|
| 6859 |
|
| 6860 |
-
|
| 6861 |
-
|
| 6862 |
-
|
| 6863 |
-
elif sdk == "gradio":
|
| 6864 |
-
file_patterns = ["app.py", "main.py", "gradio_app.py"]
|
| 6865 |
-
elif sdk == "streamlit":
|
| 6866 |
-
file_patterns = ["streamlit_app.py", "src/streamlit_app.py", "app.py", "src/app.py", "main.py", "src/main.py", "Home.py", "src/Home.py", "π _Home.py", "src/π _Home.py", "1_π _Home.py", "src/1_π _Home.py"]
|
| 6867 |
else:
|
| 6868 |
-
#
|
| 6869 |
-
|
| 6870 |
-
|
| 6871 |
-
|
| 6872 |
-
|
| 6873 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6874 |
content = api.hf_hub_download(
|
| 6875 |
repo_id=f"{username}/{project_name}",
|
| 6876 |
-
filename=
|
| 6877 |
repo_type="space"
|
| 6878 |
)
|
| 6879 |
-
main_file = file
|
| 6880 |
-
break
|
| 6881 |
-
except:
|
| 6882 |
-
continue
|
| 6883 |
-
|
| 6884 |
-
# If still no main file found, try to list repository files and find Python files
|
| 6885 |
-
if not main_file and sdk in ["streamlit", "gradio"]:
|
| 6886 |
-
try:
|
| 6887 |
-
from huggingface_hub import list_repo_files
|
| 6888 |
-
files = list_repo_files(repo_id=f"{username}/{project_name}", repo_type="space")
|
| 6889 |
|
| 6890 |
-
#
|
| 6891 |
-
|
| 6892 |
-
|
| 6893 |
|
| 6894 |
-
|
| 6895 |
-
try:
|
| 6896 |
-
content = api.hf_hub_download(
|
| 6897 |
-
repo_id=f"{username}/{project_name}",
|
| 6898 |
-
filename=py_file,
|
| 6899 |
-
repo_type="space"
|
| 6900 |
-
)
|
| 6901 |
-
main_file = py_file
|
| 6902 |
-
break
|
| 6903 |
-
except:
|
| 6904 |
-
continue
|
| 6905 |
-
except:
|
| 6906 |
-
pass
|
| 6907 |
-
|
| 6908 |
-
if main_file:
|
| 6909 |
-
content = api.hf_hub_download(
|
| 6910 |
-
repo_id=f"{username}/{project_name}",
|
| 6911 |
-
filename=main_file,
|
| 6912 |
-
repo_type="space"
|
| 6913 |
-
)
|
| 6914 |
-
|
| 6915 |
-
# Read the file content
|
| 6916 |
-
with open(content, 'r', encoding='utf-8') as f:
|
| 6917 |
-
file_content = f.read()
|
| 6918 |
-
|
| 6919 |
-
return f"""IMPORTED PROJECT FROM HUGGING FACE SPACE
|
| 6920 |
==============================================
|
| 6921 |
|
| 6922 |
Space: {username}/{project_name}
|
|
@@ -6924,15 +7034,15 @@ SDK: {sdk}
|
|
| 6924 |
Main File: {main_file}
|
| 6925 |
|
| 6926 |
{file_content}"""
|
| 6927 |
-
|
| 6928 |
-
|
| 6929 |
-
|
| 6930 |
-
|
| 6931 |
-
|
| 6932 |
-
|
| 6933 |
-
|
| 6934 |
-
|
| 6935 |
-
|
| 6936 |
|
| 6937 |
except Exception as e:
|
| 6938 |
return f"Error fetching space content: {str(e)}"
|
|
|
|
| 6838 |
|
| 6839 |
return combined
|
| 6840 |
|
| 6841 |
+
def fetch_all_space_files(api, username: str, project_name: str, sdk: str) -> dict:
|
| 6842 |
+
"""Fetch all relevant files from a Hugging Face Space"""
|
| 6843 |
+
files = {}
|
| 6844 |
+
|
| 6845 |
+
try:
|
| 6846 |
+
from huggingface_hub import list_repo_files
|
| 6847 |
+
all_files = list_repo_files(repo_id=f"{username}/{project_name}", repo_type="space")
|
| 6848 |
+
|
| 6849 |
+
# Filter out unwanted files
|
| 6850 |
+
relevant_files = []
|
| 6851 |
+
for file in all_files:
|
| 6852 |
+
# Skip hidden files, git files, and certain extensions
|
| 6853 |
+
if (file.startswith('.') or
|
| 6854 |
+
file.endswith('.md') or
|
| 6855 |
+
(file.endswith('.txt') and file not in ['requirements.txt', 'packages.txt']) or
|
| 6856 |
+
file.endswith('.log') or
|
| 6857 |
+
file.endswith('.pyc') or
|
| 6858 |
+
'__pycache__' in file):
|
| 6859 |
+
continue
|
| 6860 |
+
relevant_files.append(file)
|
| 6861 |
+
|
| 6862 |
+
# Define priority files based on SDK
|
| 6863 |
+
priority_files = []
|
| 6864 |
+
if sdk == "gradio":
|
| 6865 |
+
priority_files = ["app.py", "main.py", "gradio_app.py", "requirements.txt", "packages.txt"]
|
| 6866 |
+
elif sdk == "streamlit":
|
| 6867 |
+
priority_files = ["streamlit_app.py", "app.py", "main.py", "requirements.txt", "packages.txt"]
|
| 6868 |
+
elif sdk == "static":
|
| 6869 |
+
priority_files = ["index.html", "index.js", "style.css", "script.js"]
|
| 6870 |
+
|
| 6871 |
+
# Add priority files first, then other Python files, then other files
|
| 6872 |
+
files_to_fetch = []
|
| 6873 |
+
|
| 6874 |
+
# Add priority files that exist
|
| 6875 |
+
for pfile in priority_files:
|
| 6876 |
+
if pfile in relevant_files:
|
| 6877 |
+
files_to_fetch.append(pfile)
|
| 6878 |
+
relevant_files.remove(pfile)
|
| 6879 |
+
|
| 6880 |
+
# Add other Python files
|
| 6881 |
+
python_files = [f for f in relevant_files if f.endswith('.py')]
|
| 6882 |
+
files_to_fetch.extend(python_files)
|
| 6883 |
+
for pf in python_files:
|
| 6884 |
+
if pf in relevant_files:
|
| 6885 |
+
relevant_files.remove(pf)
|
| 6886 |
+
|
| 6887 |
+
# Add other important files (JS, CSS, JSON, etc.)
|
| 6888 |
+
other_important = [f for f in relevant_files if any(f.endswith(ext) for ext in ['.js', '.css', '.json', '.html', '.yml', '.yaml'])]
|
| 6889 |
+
files_to_fetch.extend(other_important)
|
| 6890 |
+
|
| 6891 |
+
# Limit to reasonable number of files to avoid overwhelming
|
| 6892 |
+
files_to_fetch = files_to_fetch[:20] # Max 20 files
|
| 6893 |
+
|
| 6894 |
+
# Download each file
|
| 6895 |
+
for file_name in files_to_fetch:
|
| 6896 |
+
try:
|
| 6897 |
+
content_path = api.hf_hub_download(
|
| 6898 |
+
repo_id=f"{username}/{project_name}",
|
| 6899 |
+
filename=file_name,
|
| 6900 |
+
repo_type="space"
|
| 6901 |
+
)
|
| 6902 |
+
|
| 6903 |
+
# Read file content with appropriate encoding
|
| 6904 |
+
try:
|
| 6905 |
+
with open(content_path, 'r', encoding='utf-8') as f:
|
| 6906 |
+
files[file_name] = f.read()
|
| 6907 |
+
except UnicodeDecodeError:
|
| 6908 |
+
# For binary files or files with different encoding
|
| 6909 |
+
with open(content_path, 'rb') as f:
|
| 6910 |
+
content = f.read()
|
| 6911 |
+
# Skip binary files that are too large or not text
|
| 6912 |
+
if len(content) > 100000: # Skip files > 100KB
|
| 6913 |
+
files[file_name] = f"[Binary file: {file_name} - {len(content)} bytes]"
|
| 6914 |
+
else:
|
| 6915 |
+
try:
|
| 6916 |
+
files[file_name] = content.decode('utf-8')
|
| 6917 |
+
except:
|
| 6918 |
+
files[file_name] = f"[Binary file: {file_name} - {len(content)} bytes]"
|
| 6919 |
+
except Exception as e:
|
| 6920 |
+
files[file_name] = f"[Error loading {file_name}: {str(e)}]"
|
| 6921 |
+
|
| 6922 |
+
except Exception as e:
|
| 6923 |
+
# Fallback to single file loading
|
| 6924 |
+
return {}
|
| 6925 |
+
|
| 6926 |
+
return files
|
| 6927 |
+
|
| 6928 |
+
def format_multi_file_space(files: dict, username: str, project_name: str, sdk: str) -> str:
|
| 6929 |
+
"""Format multiple files from a space into a readable format"""
|
| 6930 |
+
if not files:
|
| 6931 |
+
return ""
|
| 6932 |
+
|
| 6933 |
+
header = f"""IMPORTED PROJECT FROM HUGGING FACE SPACE
|
| 6934 |
+
==============================================
|
| 6935 |
+
|
| 6936 |
+
Space: {username}/{project_name}
|
| 6937 |
+
SDK: {sdk}
|
| 6938 |
+
Files: {len(files)} files loaded
|
| 6939 |
+
|
| 6940 |
+
"""
|
| 6941 |
+
|
| 6942 |
+
# Sort files to show main files first
|
| 6943 |
+
main_files = []
|
| 6944 |
+
other_files = []
|
| 6945 |
+
|
| 6946 |
+
priority_order = ["app.py", "main.py", "streamlit_app.py", "gradio_app.py", "index.html", "requirements.txt"]
|
| 6947 |
+
|
| 6948 |
+
for priority_file in priority_order:
|
| 6949 |
+
if priority_file in files:
|
| 6950 |
+
main_files.append(priority_file)
|
| 6951 |
+
|
| 6952 |
+
for file_name in sorted(files.keys()):
|
| 6953 |
+
if file_name not in main_files:
|
| 6954 |
+
other_files.append(file_name)
|
| 6955 |
+
|
| 6956 |
+
content = header
|
| 6957 |
+
|
| 6958 |
+
# Add main files first
|
| 6959 |
+
for file_name in main_files:
|
| 6960 |
+
content += f"=== {file_name} ===\n{files[file_name]}\n\n"
|
| 6961 |
+
|
| 6962 |
+
# Add other files
|
| 6963 |
+
for file_name in other_files:
|
| 6964 |
+
content += f"=== {file_name} ===\n{files[file_name]}\n\n"
|
| 6965 |
+
|
| 6966 |
+
return content
|
| 6967 |
+
|
| 6968 |
def fetch_hf_space_content(username: str, project_name: str) -> str:
|
| 6969 |
"""Fetch content from a Hugging Face Space"""
|
| 6970 |
try:
|
|
|
|
| 6980 |
files = fetch_transformers_js_files(api, username, project_name)
|
| 6981 |
return combine_transformers_js_files(files, username, project_name)
|
| 6982 |
|
| 6983 |
+
# Use the new multi-file loading approach for all space types
|
| 6984 |
sdk = space_info.sdk
|
| 6985 |
+
files = fetch_all_space_files(api, username, project_name, sdk)
|
| 6986 |
|
| 6987 |
+
if files:
|
| 6988 |
+
# Use the multi-file format
|
| 6989 |
+
return format_multi_file_space(files, username, project_name, sdk)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6990 |
else:
|
| 6991 |
+
# Fallback to single file loading for compatibility
|
| 6992 |
+
main_file = None
|
| 6993 |
+
|
| 6994 |
+
# Define file patterns to try based on SDK
|
| 6995 |
+
if sdk == "static":
|
| 6996 |
+
file_patterns = ["index.html"]
|
| 6997 |
+
elif sdk == "gradio":
|
| 6998 |
+
file_patterns = ["app.py", "main.py", "gradio_app.py"]
|
| 6999 |
+
elif sdk == "streamlit":
|
| 7000 |
+
file_patterns = ["streamlit_app.py", "src/streamlit_app.py", "app.py", "src/app.py", "main.py", "src/main.py", "Home.py", "src/Home.py", "π _Home.py", "src/π _Home.py", "1_π _Home.py", "src/1_π _Home.py"]
|
| 7001 |
+
else:
|
| 7002 |
+
# Try common files for unknown SDKs
|
| 7003 |
+
file_patterns = ["app.py", "src/app.py", "index.html", "streamlit_app.py", "src/streamlit_app.py", "main.py", "src/main.py", "Home.py", "src/Home.py"]
|
| 7004 |
+
|
| 7005 |
+
# Try to find and download the main file
|
| 7006 |
+
for file in file_patterns:
|
| 7007 |
+
try:
|
| 7008 |
+
content = api.hf_hub_download(
|
| 7009 |
+
repo_id=f"{username}/{project_name}",
|
| 7010 |
+
filename=file,
|
| 7011 |
+
repo_type="space"
|
| 7012 |
+
)
|
| 7013 |
+
main_file = file
|
| 7014 |
+
break
|
| 7015 |
+
except:
|
| 7016 |
+
continue
|
| 7017 |
+
|
| 7018 |
+
if main_file:
|
| 7019 |
content = api.hf_hub_download(
|
| 7020 |
repo_id=f"{username}/{project_name}",
|
| 7021 |
+
filename=main_file,
|
| 7022 |
repo_type="space"
|
| 7023 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7024 |
|
| 7025 |
+
# Read the file content
|
| 7026 |
+
with open(content, 'r', encoding='utf-8') as f:
|
| 7027 |
+
file_content = f.read()
|
| 7028 |
|
| 7029 |
+
return f"""IMPORTED PROJECT FROM HUGGING FACE SPACE
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7030 |
==============================================
|
| 7031 |
|
| 7032 |
Space: {username}/{project_name}
|
|
|
|
| 7034 |
Main File: {main_file}
|
| 7035 |
|
| 7036 |
{file_content}"""
|
| 7037 |
+
else:
|
| 7038 |
+
# Try to get more information about available files for debugging
|
| 7039 |
+
try:
|
| 7040 |
+
from huggingface_hub import list_repo_files
|
| 7041 |
+
files_list = list_repo_files(repo_id=f"{username}/{project_name}", repo_type="space")
|
| 7042 |
+
available_files = [f for f in files_list if not f.startswith('.') and not f.endswith('.md')]
|
| 7043 |
+
return f"Error: Could not find main file in space {username}/{project_name}.\n\nSDK: {sdk}\nAvailable files: {', '.join(available_files[:10])}{'...' if len(available_files) > 10 else ''}\n\nTried looking for: {', '.join(file_patterns)}"
|
| 7044 |
+
except:
|
| 7045 |
+
return f"Error: Could not find main file in space {username}/{project_name}. Expected files for {sdk} SDK: {', '.join(file_patterns) if 'file_patterns' in locals() else 'standard files'}"
|
| 7046 |
|
| 7047 |
except Exception as e:
|
| 7048 |
return f"Error fetching space content: {str(e)}"
|