Not-Grim-Refer commited on
Commit
a6b9e44
·
1 Parent(s): f651327

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -34
app.py CHANGED
@@ -1,37 +1,67 @@
1
  import streamlit as st
2
- import github
3
- import os
4
 
5
- def find_large_files(file_name):
6
- gh = github.GitHub()
7
- search_results = gh.search_repos(repo_type='public', q=f"{file_name} file://*.zip")[:5] # change to [10] to show top 10 results if you wish
8
-
9
- zip_files = []
10
- for res in search_results:
11
- try:
12
- repo = gh.get_repository(res['owner']['login'], res['name'])
13
- for release in repo.get_releases():
14
- for asset in release.get('assets'):
15
- if 'application/zip' in asset['content_type'] and f"{os.path.basename(file_name)}" in asset['name'].lower():
16
- file_size = float(asset['downloads'].split('/')[-1].split('.')[0]) / (1 << 20)
17
- zip_files.append({'repo': res, 'size': file_size})
18
- break
19
- repos_by_size = sorted(zip_files, key=lambda x:x['size'], reverse=True)
20
- except Exception as e:
21
- print("Error while searching files in repository %s: %s" % (res['full_name'], str(e)))
22
- return repos_by_size
23
 
24
- st.write("Enter filename including extension: ")
25
- file_input = st.text_input("File Name", "test.txt")
26
- if ".zip" not in file_input:
27
- st.write("Please enter a valid file name.")
28
- else:
29
- st.write("Searching...")
30
- repos = find_large_files(file_input)
31
- if len(repos) == 0:
32
- st.write("No matching files found.")
33
- else:
34
- table = st.table(headers=["Repo URL", "Size"], rows=[r["repo"].url for r in repos], postfix="B") + "
35
- <a href={}: Download Results".format("https://drive.google.com/uc?id="+os.environ['STREAMLIT_USERNAME']+"&export=download")
36
- st.markdown(table)
37
- st.button("Download")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from huggingface_hub import Repository
3
+ from github import Github
4
 
5
+ # Set up the Streamlit app
6
+ st.set_page_config(page_title="GitHub File Search", page_icon=":mag_right:")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
+ # Define a function to get the 10 repositories with the largest file of the given name
9
+ def get_repositories_with_largest_file(filename):
10
+ # Search for the file on GitHub using the Hugging Face API
11
+ repositories = Repository.search(query=filename, provider="github")
12
+ # Initialize a list to store the repository information
13
+ repo_info = []
14
+ # Loop through the repositories and get the largest file size
15
+ for repo in repositories[:10]:
16
+ # Get the repository information using the PyGitHub library
17
+ g = Github()
18
+ repository = g.get_repo(repo.full_name)
19
+ # Get the largest file in the repository
20
+ largest_file = None
21
+ largest_file_size = 0
22
+ for file in repository.get_files():
23
+ if file.name == filename and file.size > largest_file_size:
24
+ largest_file = file
25
+ largest_file_size = file.size
26
+ # Add the repository information to the list
27
+ if largest_file:
28
+ repo_info.append({
29
+ "name": repo.name,
30
+ "description": repo.description,
31
+ "size": largest_file_size,
32
+ "url": repo.html_url
33
+ })
34
+ # Return the repository information
35
+ return repo_info
36
+
37
+ # Define the Streamlit app
38
+ def app():
39
+ # Set up the user interface
40
+ st.title("GitHub File Search")
41
+ st.write("Enter a file name to search for on GitHub:")
42
+ filename = st.text_input("File name")
43
+ if filename:
44
+ if "." not in filename:
45
+ st.error(f"{filename} is not a valid file name. Please enter a valid file name with the file extension.")
46
+ else:
47
+ # Get the repositories with the largest file of the given name
48
+ repo_info = get_repositories_with_largest_file(filename)
49
+ # Display the repository information
50
+ if repo_info:
51
+ st.write(f"Top 10 repositories with the largest {filename}:")
52
+ for index, repo in enumerate(repo_info):
53
+ st.write(f"{index+1}. [{repo['name']}]({repo['url']}) - {repo['size']} bytes")
54
+ st.write(f" {repo['description']}")
55
+ else:
56
+ st.warning(f"No repositories found with the file name {filename}.")
57
+ # Add a button to download the results as a text file
58
+ st.download_button(
59
+ label="Download results as text file",
60
+ data='\n\n'.join([f"{index+1}. {repo['name']} - {repo['size']} bytes\n{repo['url']}\n{repo['description']}" for index, repo in enumerate(repo_info)]),
61
+ file_name=f"GitHub File Search Results - {filename}.txt",
62
+ mime="text/plain"
63
+ )
64
+
65
+ # Run the Streamlit app
66
+ if __name__ == "__main__":
67
+ app()