Spaces:
Build error
Build error
Commit ·
a6b9e44
1
Parent(s): f651327
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,37 +1,67 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
import
|
| 3 |
-
import
|
| 4 |
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
search_results = gh.search_repos(repo_type='public', q=f"{file_name} file://*.zip")[:5] # change to [10] to show top 10 results if you wish
|
| 8 |
-
|
| 9 |
-
zip_files = []
|
| 10 |
-
for res in search_results:
|
| 11 |
-
try:
|
| 12 |
-
repo = gh.get_repository(res['owner']['login'], res['name'])
|
| 13 |
-
for release in repo.get_releases():
|
| 14 |
-
for asset in release.get('assets'):
|
| 15 |
-
if 'application/zip' in asset['content_type'] and f"{os.path.basename(file_name)}" in asset['name'].lower():
|
| 16 |
-
file_size = float(asset['downloads'].split('/')[-1].split('.')[0]) / (1 << 20)
|
| 17 |
-
zip_files.append({'repo': res, 'size': file_size})
|
| 18 |
-
break
|
| 19 |
-
repos_by_size = sorted(zip_files, key=lambda x:x['size'], reverse=True)
|
| 20 |
-
except Exception as e:
|
| 21 |
-
print("Error while searching files in repository %s: %s" % (res['full_name'], str(e)))
|
| 22 |
-
return repos_by_size
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
from huggingface_hub import Repository
|
| 3 |
+
from github import Github
|
| 4 |
|
| 5 |
+
# Set up the Streamlit app
|
| 6 |
+
st.set_page_config(page_title="GitHub File Search", page_icon=":mag_right:")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
# Define a function to get the 10 repositories with the largest file of the given name
|
| 9 |
+
def get_repositories_with_largest_file(filename):
|
| 10 |
+
# Search for the file on GitHub using the Hugging Face API
|
| 11 |
+
repositories = Repository.search(query=filename, provider="github")
|
| 12 |
+
# Initialize a list to store the repository information
|
| 13 |
+
repo_info = []
|
| 14 |
+
# Loop through the repositories and get the largest file size
|
| 15 |
+
for repo in repositories[:10]:
|
| 16 |
+
# Get the repository information using the PyGitHub library
|
| 17 |
+
g = Github()
|
| 18 |
+
repository = g.get_repo(repo.full_name)
|
| 19 |
+
# Get the largest file in the repository
|
| 20 |
+
largest_file = None
|
| 21 |
+
largest_file_size = 0
|
| 22 |
+
for file in repository.get_files():
|
| 23 |
+
if file.name == filename and file.size > largest_file_size:
|
| 24 |
+
largest_file = file
|
| 25 |
+
largest_file_size = file.size
|
| 26 |
+
# Add the repository information to the list
|
| 27 |
+
if largest_file:
|
| 28 |
+
repo_info.append({
|
| 29 |
+
"name": repo.name,
|
| 30 |
+
"description": repo.description,
|
| 31 |
+
"size": largest_file_size,
|
| 32 |
+
"url": repo.html_url
|
| 33 |
+
})
|
| 34 |
+
# Return the repository information
|
| 35 |
+
return repo_info
|
| 36 |
+
|
| 37 |
+
# Define the Streamlit app
|
| 38 |
+
def app():
|
| 39 |
+
# Set up the user interface
|
| 40 |
+
st.title("GitHub File Search")
|
| 41 |
+
st.write("Enter a file name to search for on GitHub:")
|
| 42 |
+
filename = st.text_input("File name")
|
| 43 |
+
if filename:
|
| 44 |
+
if "." not in filename:
|
| 45 |
+
st.error(f"{filename} is not a valid file name. Please enter a valid file name with the file extension.")
|
| 46 |
+
else:
|
| 47 |
+
# Get the repositories with the largest file of the given name
|
| 48 |
+
repo_info = get_repositories_with_largest_file(filename)
|
| 49 |
+
# Display the repository information
|
| 50 |
+
if repo_info:
|
| 51 |
+
st.write(f"Top 10 repositories with the largest {filename}:")
|
| 52 |
+
for index, repo in enumerate(repo_info):
|
| 53 |
+
st.write(f"{index+1}. [{repo['name']}]({repo['url']}) - {repo['size']} bytes")
|
| 54 |
+
st.write(f" {repo['description']}")
|
| 55 |
+
else:
|
| 56 |
+
st.warning(f"No repositories found with the file name {filename}.")
|
| 57 |
+
# Add a button to download the results as a text file
|
| 58 |
+
st.download_button(
|
| 59 |
+
label="Download results as text file",
|
| 60 |
+
data='\n\n'.join([f"{index+1}. {repo['name']} - {repo['size']} bytes\n{repo['url']}\n{repo['description']}" for index, repo in enumerate(repo_info)]),
|
| 61 |
+
file_name=f"GitHub File Search Results - {filename}.txt",
|
| 62 |
+
mime="text/plain"
|
| 63 |
+
)
|
| 64 |
+
|
| 65 |
+
# Run the Streamlit app
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
app()
|