NeoPy commited on
Commit
ce0153c
·
verified ·
1 Parent(s): 3723064

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -125
app.py CHANGED
@@ -5,145 +5,166 @@ import unicodedata
5
  import re
6
  import ast
7
  import requests
 
 
8
 
9
- database_url = "https://raw.githubusercontent.com/R3gm/database_zip_files/main/archive/database.csv"
10
- database_path = "database.csv"
11
- description = "This app digs through Hugging Face’s public zip files hunting for RVC models… and occasionally brings back random stuff that has nothing to do with them. Don’t worry though—the best RVC matches are always shown first, because we like to pretend we’re organized."
 
12
 
 
 
 
 
 
 
 
13
 
14
- def clean_file_url(val):
15
- # If missing
16
- if pd.isna(val):
17
- return ""
18
-
19
- # If it's already a list (e.g. from JSON/df directly)
20
- if isinstance(val, list):
21
- return ", ".join(map(str, val))
22
-
23
- # If it's a string like '["a","b"]'
24
- if isinstance(val, str) and val.strip().startswith("[") and val.strip().endswith("]"):
25
- try:
26
- parsed = ast.literal_eval(val)
27
- if isinstance(parsed, list):
28
- return ", ".join(map(str, parsed))
29
- except Exception:
30
- return val # fallback: leave as-is
31
-
32
- # Otherwise, return as-is
33
- return str(val)
34
-
35
 
36
- def normalize(text: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  if pd.isna(text):
38
  return ""
39
- # Convert to lowercase
40
  text = text.lower()
41
- # Remove accents
42
- text = ''.join(
43
- c for c in unicodedata.normalize('NFD', text)
44
- if unicodedata.category(c) != 'Mn'
45
- )
46
- # Replace separators with space
47
  return re.sub(r"[+()\-_/.]", " ", text)
48
 
49
-
50
- def search_files(query: str):
51
- if not query.strip():
52
- return pd.DataFrame([{"Result": "Empty query"}])
53
-
54
- keywords = normalize(query).split()
55
-
56
- whole_conditions = " AND ".join([
57
- f"(FILENAME_NORM LIKE '% {k} %' OR FILENAME_NORM LIKE '{k} %' OR FILENAME_NORM LIKE '% {k}' OR FILENAME_NORM = '{k}')"
58
- for k in keywords
59
- ])
60
- partial_conditions = " AND ".join([f"FILENAME_NORM LIKE '%{k}%'" for k in keywords])
61
-
62
- sql = f"""
63
- SELECT *,
64
- CASE WHEN {whole_conditions} THEN 1 ELSE 0 END AS whole_match
65
- FROM files
66
- WHERE {partial_conditions}
67
- ORDER BY whole_match DESC, orig_index ASC;
68
  """
69
-
70
- df = pd.read_sql(sql, conn)
71
-
72
- if df.empty:
73
- return "<p>No matches found</p>"
74
-
75
- df_subset = df.head(250) # limit 250 results
76
-
77
- rows = []
78
- for i, row in enumerate(df_subset.itertuples(index=False)):
79
- filename = row.FILENAME
80
- url = row.PARSED_URL
81
- model_id = row.MODEL_ID
82
-
83
- rows.append(f"""
84
- <tr>
85
- <td>{filename}</td>
86
- <td>
87
- <input type="text" value="{url}" id="copytext{i}" readonly
88
- style="width:300px; padding:4px; border-radius:6px; border:1px solid #666;
89
- background-color:var(--block-background-fill);
90
- color:var(--body-text-color);" />
91
- <button style="margin-left:5px; padding:4px 8px; border-radius:6px;
92
- background-color:var(--button-primary-background-fill);
93
- color:var(--button-primary-text-color);
94
- border:none; cursor:pointer;"
95
- onclick="navigator.clipboard.writeText(document.getElementById('copytext{i}').value)">
96
- Copy
97
- </button>
98
- </td>
99
- <td>{model_id}</td>
100
- </tr>
101
- """)
102
-
103
- html = f"""
104
- <table border=1 style="border-collapse:collapse; width:100%; text-align:left;">
105
- <thead>
106
- <tr>
107
- <th style="padding:6px;">Filename</th>
108
- <th style="padding:6px;">File URL</th>
109
- <th style="padding:6px;">Repo ID</th>
110
- </tr>
111
- </thead>
112
- <tbody>
113
- {''.join(rows)}
114
- </tbody>
115
- </table>
116
  """
 
 
 
 
 
 
 
 
 
 
117
 
118
- return html
119
-
120
-
121
- response = requests.get(database_url, stream=True)
122
- with open(database_path, "wb") as f:
123
- for chunk in response.iter_content(chunk_size=8192):
124
- f.write(chunk)
125
-
126
- df = pd.read_csv(database_path)
127
-
128
- df["FILENAME_NORM"] = df["FILENAME"].apply(normalize)
129
- df["PARSED_URL"] = df["PARSED_URL"].apply(clean_file_url)
130
-
131
- df = df.reset_index(drop=True)
132
- df["orig_index"] = df.index
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
 
134
- # Connect to SQLite
135
- conn = sqlite3.connect(":memory:", check_same_thread=False)
136
- df.to_sql("files", conn, index=False, if_exists="replace")
137
 
138
- with gr.Blocks() as demo:
139
- gr.Markdown("## 🔍 RVC Voice Finder")
140
- query_input = gr.Textbox(label="Search here", placeholder="Hatsune Miku")
141
- button_query = gr.Button("Search")
142
- output = gr.HTML(label="Search Results")
143
- gr.Markdown(description)
144
 
145
- query_input.submit(search_files, inputs=query_input, outputs=output)
146
- button_query.click(search_files, inputs=query_input, outputs=output)
147
 
148
  if __name__ == "__main__":
149
- demo.launch(debug=True, show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import re
6
  import ast
7
  import requests
8
+ from pathlib import Path
9
+ from typing import Optional
10
 
11
+ # --- Constants ---
12
+ DATABASE_URL = "https://raw.githubusercontent.com/R3gm/database_zip_files/main/archive/database.csv"
13
+ DATABASE_PATH = Path("database.csv")
14
+ DB_CONNECTION = None
15
 
16
+ # --- UI Configuration ---
17
+ APP_TITLE = "## 🔍 RVC Voice Finder"
18
+ APP_DESCRIPTION = (
19
+ "This app digs through Hugging Face’s public zip files hunting for RVC models… "
20
+ "and occasionally brings back random stuff that has nothing to do with them. "
21
+ "Don’t worry though—the best matches are always shown first."
22
+ )
23
 
24
+ # --- Function Definitions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ def setup_database() -> Optional[sqlite3.Connection]:
27
+ """
28
+ Downloads the database, preprocesses it, and loads it into an in-memory
29
+ SQLite FTS5 table for fast text searching.
30
+ """
31
+ print("Setting up the database...")
32
+ try:
33
+ # Download the database file
34
+ print(f"Downloading data from {DATABASE_URL}...")
35
+ response = requests.get(DATABASE_URL, stream=True, timeout=30)
36
+ response.raise_for_status()
37
+ with open(DATABASE_PATH, "wb") as f:
38
+ for chunk in response.iter_content(chunk_size=8192):
39
+ f.write(chunk)
40
+ print("Download complete.")
41
+
42
+ except requests.exceptions.RequestException as e:
43
+ print(f"Error downloading the database: {e}")
44
+ # Use local file if it exists, otherwise fail
45
+ if not DATABASE_PATH.exists():
46
+ raise FileNotFoundError(f"Failed to download and local copy not found at {DATABASE_PATH}") from e
47
+ print("Using existing local database file.")
48
+
49
+ # Load and preprocess the data with pandas
50
+ df = pd.read_csv(DATABASE_PATH)
51
+ df.rename(columns={"FILENAME": "Filename", "PARSED_URL": "URL", "MODEL_ID": "Repo ID"}, inplace=True)
52
+ df['normalized_filename'] = df['Filename'].apply(normalize_text)
53
+ df['URL'] = df['URL'].apply(clean_file_url)
54
+ df = df.reset_index().rename(columns={'index': 'rowid'}) # Use original index as rowid
55
+
56
+ # Connect to an in-memory SQLite database
57
+ conn = sqlite3.connect(":memory:", check_same_thread=False)
58
+
59
+ # Load the main data into a standard table
60
+ df.to_sql("models", conn, index=False, if_exists="replace")
61
+
62
+ # Create and populate the FTS5 virtual table for fast searching
63
+ conn.execute("""
64
+ CREATE VIRTUAL TABLE models_fts USING fts5(
65
+ Filename,
66
+ normalized_filename,
67
+ URL,
68
+ 'Repo ID',
69
+ content='models',
70
+ content_rowid='rowid'
71
+ );
72
+ """)
73
+ conn.execute("""
74
+ INSERT INTO models_fts(rowid, Filename, normalized_filename, URL, "Repo ID")
75
+ SELECT rowid, Filename, normalized_filename, URL, "Repo ID" FROM models;
76
+ """)
77
+ print("Database setup complete and loaded into memory.")
78
+ return conn
79
+
80
+ def normalize_text(text: str) -> str:
81
+ """
82
+ Cleans and standardizes text for searching by lowercasing,
83
+ removing accents, and replacing separators with spaces.
84
+ """
85
  if pd.isna(text):
86
  return ""
 
87
  text = text.lower()
88
+ text = ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn')
 
 
 
 
 
89
  return re.sub(r"[+()\-_/.]", " ", text)
90
 
91
+ def clean_file_url(val) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  """
93
+ Cleans the URL column, handling lists stored as strings.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  """
95
+ if pd.isna(val):
96
+ return ""
97
+ if isinstance(val, str) and val.strip().startswith("["):
98
+ try:
99
+ # Safely evaluate string representation of a list
100
+ parsed_list = ast.literal_eval(val)
101
+ return ", ".join(map(str, parsed_list)) if isinstance(parsed_list, list) else val
102
+ except (ValueError, SyntaxError):
103
+ return val # Return original string if parsing fails
104
+ return str(val)
105
 
106
+ def search_models(query: str) -> Optional[pd.DataFrame]:
107
+ """
108
+ Searches the FTS table for models matching the query.
109
+ """
110
+ if not query.strip() or DB_CONNECTION is None:
111
+ return None
112
+
113
+ # Sanitize query and prepare for FTS by joining with "AND"
114
+ keywords = normalize_text(query).split()
115
+ fts_query = " AND ".join(keywords)
116
+
117
+ if not fts_query:
118
+ return None
119
+
120
+ # Use FTS MATCH operator for efficient search
121
+ sql_query = f"""
122
+ SELECT Filename, URL, "Repo ID"
123
+ FROM models_fts
124
+ WHERE normalized_filename MATCH ?
125
+ ORDER BY rank
126
+ LIMIT 250;
127
+ """
128
+
129
+ try:
130
+ df_results = pd.read_sql_query(sql_query, DB_CONNECTION, params=(fts_query,))
131
+ except sqlite3.OperationalError as e:
132
+ # This can happen if FTS query syntax is invalid
133
+ gr.Warning(f"Search error: {e}")
134
+ return None
135
 
136
+ if df_results.empty:
137
+ gr.Info("No matches found for your query.")
138
+ return None
139
 
140
+ return df_results
 
 
 
 
 
141
 
142
+ # --- Main Execution & Gradio App ---
 
143
 
144
  if __name__ == "__main__":
145
+ DB_CONNECTION = setup_database()
146
+
147
+ with gr.Blocks() as demo:
148
+ gr.Markdown(APP_TITLE)
149
+
150
+ with gr.Row():
151
+ query_input = gr.Textbox(
152
+ label="Search here",
153
+ placeholder="e.g., Hatsune Miku",
154
+ scale=4,
155
+ )
156
+ search_button = gr.Button("Search", variant="primary", scale=1)
157
+
158
+ output_df = gr.DataFrame(
159
+ label="Search Results",
160
+ interactive=False,
161
+ headers=["Filename", "URL", "Repo ID"]
162
+ )
163
+
164
+ gr.Markdown(APP_DESCRIPTION)
165
+
166
+ # Event listeners
167
+ query_input.submit(search_models, inputs=query_input, outputs=output_df)
168
+ search_button.click(search_models, inputs=query_input, outputs=output_df)
169
+
170
+ demo.launch(debug=True, show_error=True)