Spaces:
Running
Running
Commit
·
5c4ad21
verified
·
0
Parent(s):
Super-squash branch 'main' using huggingface_hub
Browse files- .gitattributes +37 -0
- README.md +13 -0
- app.py +107 -0
- requirements.txt +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
danbooru_all_tags.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
danbooru_id_url.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Danbooru Images
|
| 3 |
+
emoji: 🚀
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: blue
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.35.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import time
|
| 4 |
+
import json
|
| 5 |
+
import plotly.graph_objects as go
|
| 6 |
+
|
| 7 |
+
st.set_page_config(layout="wide")
|
| 8 |
+
|
| 9 |
+
@st.cache_resource
|
| 10 |
+
def load_and_preprocess_data():
|
| 11 |
+
start_time = time.time()
|
| 12 |
+
df = pd.read_parquet(os.getenv('PARQUET_FILE'))
|
| 13 |
+
df = df.sort_values(by='post_id', ascending=False)
|
| 14 |
+
df["tags"] = df["tags"].apply(lambda x: set(x))
|
| 15 |
+
df.set_index('post_id', inplace=True)
|
| 16 |
+
|
| 17 |
+
sorted_indices = {
|
| 18 |
+
'Post ID (Descending)': df.index,
|
| 19 |
+
'Post ID (Ascending)': df.index[::-1],
|
| 20 |
+
'Clip Score': df['clip_aesthetic'].sort_values(ascending=False).index,
|
| 21 |
+
'Siglip Score': df['clip_aesthetic_2_5'].sort_values(ascending=False).index,
|
| 22 |
+
}
|
| 23 |
+
print(f"Data loaded and preprocessed: {time.time() - start_time:.2f} seconds")
|
| 24 |
+
return df, sorted_indices
|
| 25 |
+
|
| 26 |
+
st.title('Danbooru Images')
|
| 27 |
+
data, sorted_indices = load_and_preprocess_data()
|
| 28 |
+
|
| 29 |
+
# isdebar
|
| 30 |
+
st.sidebar.header('Filter Options')
|
| 31 |
+
st.sidebar.write('Adjust the filter options to refine the results.')
|
| 32 |
+
score_range = st.sidebar.slider('Select clip score range', min_value=0.0, max_value=10.0, value=(0.0, 10.0), step=0.1, help='Filter images based on their CLIP score range.')
|
| 33 |
+
score_range_v2 = st.sidebar.slider('Select siglip score range', min_value=0.0, max_value=10.0, value=(6.0, 10.0), step=0.1, help='Filter images based on their SigLIP score range.')
|
| 34 |
+
page_number = st.sidebar.number_input('Page', min_value=1, value=1, step=1, help='Navigate through the pages of filtered results.')
|
| 35 |
+
sort_option = st.sidebar.selectbox('Sort by (slow)', options=['Post ID (Descending)', 'Post ID (Ascending)', 'Clip Score', 'Siglip Score'], help='Select sorting option for the results.')
|
| 36 |
+
|
| 37 |
+
# user input
|
| 38 |
+
user_input_tags = st.text_input('Enter tags (space-separated)', help='Filter images based on tags. Use "-" to exclude tags.')
|
| 39 |
+
selected_tags = set([tag.strip() for tag in user_input_tags.split() if tag.strip() and not tag.strip().startswith('-')])
|
| 40 |
+
undesired_tags = set([tag[1:] for tag in user_input_tags.split() if tag.startswith('-')])
|
| 41 |
+
print(f"Selected tags: {selected_tags}, Undesired tags: {undesired_tags}")
|
| 42 |
+
|
| 43 |
+
# Function to filter data based on user input
|
| 44 |
+
def filter_data(df, score_range, score_range_v2, selected_tags, sort_option):
|
| 45 |
+
start_time = time.time()
|
| 46 |
+
|
| 47 |
+
filtered_data = df[
|
| 48 |
+
(df['clip_aesthetic'] >= score_range[0]) &
|
| 49 |
+
(df['clip_aesthetic'] <= score_range[1]) &
|
| 50 |
+
(df['clip_aesthetic_2_5'] >= score_range_v2[0]) &
|
| 51 |
+
(df['clip_aesthetic_2_5'] <= score_range_v2[1])
|
| 52 |
+
]
|
| 53 |
+
print(f"Data filtered based on scores: {time.time() - start_time:.2f} seconds")
|
| 54 |
+
|
| 55 |
+
if sort_option != "Post ID (Descending)":
|
| 56 |
+
sorted_index = sorted_indices[sort_option]
|
| 57 |
+
sorted_index = sorted_index[sorted_index.isin(filtered_data.index)]
|
| 58 |
+
filtered_data = filtered_data.loc[sorted_index]
|
| 59 |
+
print(f"Applying indcies: {time.time() - start_time:.2f} seconds")
|
| 60 |
+
|
| 61 |
+
if selected_tags or undesired_tags:
|
| 62 |
+
filtered_data = filtered_data[filtered_data['tags'].apply(lambda x: selected_tags.issubset(x) and not undesired_tags.intersection(x))]
|
| 63 |
+
|
| 64 |
+
print(f"Data filtered: {time.time() - start_time:.2f} seconds")
|
| 65 |
+
return filtered_data
|
| 66 |
+
|
| 67 |
+
# Filter data
|
| 68 |
+
filtered_data = filter_data(data, score_range, score_range_v2, selected_tags, sort_option)
|
| 69 |
+
st.sidebar.write(f"Total filtered images: {len(filtered_data)}")
|
| 70 |
+
|
| 71 |
+
# Pagination
|
| 72 |
+
items_per_page = 30
|
| 73 |
+
start_idx = (page_number - 1) * items_per_page
|
| 74 |
+
end_idx = start_idx + items_per_page
|
| 75 |
+
current_data = filtered_data.iloc[start_idx:end_idx]
|
| 76 |
+
|
| 77 |
+
# Display the data
|
| 78 |
+
columns_per_row = 5
|
| 79 |
+
rows = [current_data.iloc[i:i + columns_per_row] for i in range(0, len(current_data), columns_per_row)]
|
| 80 |
+
for row in rows:
|
| 81 |
+
cols = st.columns(columns_per_row)
|
| 82 |
+
for col, (_, row_data) in zip(cols, row.iterrows()):
|
| 83 |
+
with col:
|
| 84 |
+
st.image(row_data['large_file_url'], caption=f"ID: {row_data.name}, CLIP: {row_data['clip_aesthetic']:.2f}, SigLIP: {row_data['clip_aesthetic_2_5']:.2f}", use_column_width=True)
|
| 85 |
+
# st.markdown("<div style='margin: 2px;'></div>", unsafe_allow_html=True)
|
| 86 |
+
|
| 87 |
+
def histogram_slider(df, column1, column2):
|
| 88 |
+
sample_data = df.sample(min(5000, len(df)))
|
| 89 |
+
|
| 90 |
+
fig = go.Figure()
|
| 91 |
+
fig.add_trace(go.Histogram(x=sample_data[column1], nbinsx=50, name=column1, opacity=0.75))
|
| 92 |
+
fig.add_trace(go.Histogram(x=sample_data[column2], nbinsx=50, name=column2, opacity=0.75))
|
| 93 |
+
fig.update_layout(
|
| 94 |
+
barmode='overlay',
|
| 95 |
+
bargap=0.1,
|
| 96 |
+
height=200,
|
| 97 |
+
xaxis=dict(showticklabels=True),
|
| 98 |
+
yaxis=dict(showticklabels=True),
|
| 99 |
+
margin=dict(l=0, r=0, t=0, b=0),
|
| 100 |
+
legend=dict(orientation='h', yanchor='bottom', y=-0.4, xanchor='center', x=0.5),
|
| 101 |
+
)
|
| 102 |
+
st.sidebar.plotly_chart(fig, use_container_width=True, config={'displayModeBar': False})
|
| 103 |
+
|
| 104 |
+
# histogram
|
| 105 |
+
start_time = time.time()
|
| 106 |
+
histogram_slider(filtered_data, 'clip_aesthetic', 'clip_aesthetic_2_5')
|
| 107 |
+
print(f"Histogram displayed: {time.time() - start_time:.2f} seconds")
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pyarrow>=16.0.0
|
| 2 |
+
pandas>=2.2.2
|
| 3 |
+
plotly
|