Model_Voice_Finder

Sleeping

App Files Files Community

NeoPy commited on Sep 9, 2025

Commit

ce0153c

verified ·

1 Parent(s): 3723064

Update app.py

Browse files

Files changed (1) hide show

app.py +146 -125

app.py CHANGED Viewed

@@ -5,145 +5,166 @@ import unicodedata
 import re
 import ast
 import requests
-database_url = "https://raw.githubusercontent.com/R3gm/database_zip_files/main/archive/database.csv"
-database_path = "database.csv"
-description = "This app digs through Hugging Face’s public zip files hunting for RVC models… and occasionally brings back random stuff that has nothing to do with them. Don’t worry though—the best RVC matches are always shown first, because we like to pretend we’re organized."
-def clean_file_url(val):
-    # If missing
-    if pd.isna(val):
-        return ""
-    # If it's already a list (e.g. from JSON/df directly)
-    if isinstance(val, list):
-        return ", ".join(map(str, val))
-    # If it's a string like '["a","b"]'
-    if isinstance(val, str) and val.strip().startswith("[") and val.strip().endswith("]"):
-        try:
-            parsed = ast.literal_eval(val)
-            if isinstance(parsed, list):
-                return ", ".join(map(str, parsed))
-        except Exception:
-            return val  # fallback: leave as-is
-    # Otherwise, return as-is
-    return str(val)
-def normalize(text: str) -> str:
     if pd.isna(text):
         return ""
-    # Convert to lowercase
     text = text.lower()
-    # Remove accents
-    text = ''.join(
-        c for c in unicodedata.normalize('NFD', text)
-        if unicodedata.category(c) != 'Mn'
-    )
-    # Replace separators with space
     return re.sub(r"[+()\-_/.]", " ", text)
-def search_files(query: str):
-    if not query.strip():
-        return pd.DataFrame([{"Result": "Empty query"}])
-    keywords = normalize(query).split()
-    whole_conditions = " AND ".join([
-        f"(FILENAME_NORM LIKE '% {k} %' OR FILENAME_NORM LIKE '{k} %' OR FILENAME_NORM LIKE '% {k}' OR FILENAME_NORM = '{k}')"
-        for k in keywords
-    ])
-    partial_conditions = " AND ".join([f"FILENAME_NORM LIKE '%{k}%'" for k in keywords])
-    sql = f"""
-    SELECT *,
-           CASE WHEN {whole_conditions} THEN 1 ELSE 0 END AS whole_match
-    FROM files
-    WHERE {partial_conditions}
-    ORDER BY whole_match DESC, orig_index ASC;
     """
-    df = pd.read_sql(sql, conn)
-    if df.empty:
-        return "<p>No matches found</p>"
-    df_subset = df.head(250)  # limit 250 results
-    rows = []
-    for i, row in enumerate(df_subset.itertuples(index=False)):
-        filename = row.FILENAME
-        url = row.PARSED_URL
-        model_id = row.MODEL_ID
-        rows.append(f"""
-        <tr>
-            <td>{filename}</td>
-            <td>
-                <input type="text" value="{url}" id="copytext{i}" readonly
-                      style="width:300px; padding:4px; border-radius:6px; border:1px solid #666;
-                            background-color:var(--block-background-fill);
-                            color:var(--body-text-color);" />
-                <button style="margin-left:5px; padding:4px 8px; border-radius:6px;
-                              background-color:var(--button-primary-background-fill);
-                              color:var(--button-primary-text-color);
-                              border:none; cursor:pointer;"
-                        onclick="navigator.clipboard.writeText(document.getElementById('copytext{i}').value)">
-                    Copy
-                </button>
-            </td>
-            <td>{model_id}</td>
-        </tr>
-        """)
-    html = f"""
-    <table border=1 style="border-collapse:collapse; width:100%; text-align:left;">
-    <thead>
-        <tr>
-            <th style="padding:6px;">Filename</th>
-            <th style="padding:6px;">File URL</th>
-            <th style="padding:6px;">Repo ID</th>
-        </tr>
-    </thead>
-    <tbody>
-    {''.join(rows)}
-    </tbody>
-    </table>
     """
-    return html
-response = requests.get(database_url, stream=True)
-with open(database_path, "wb") as f:
-    for chunk in response.iter_content(chunk_size=8192):
-        f.write(chunk)
-df = pd.read_csv(database_path)
-df["FILENAME_NORM"] = df["FILENAME"].apply(normalize)
-df["PARSED_URL"] = df["PARSED_URL"].apply(clean_file_url)
-df = df.reset_index(drop=True)
-df["orig_index"] = df.index
-# Connect to SQLite
-conn = sqlite3.connect(":memory:", check_same_thread=False)
-df.to_sql("files", conn, index=False, if_exists="replace")
-with gr.Blocks() as demo:
-    gr.Markdown("## 🔍 RVC Voice Finder")
-    query_input = gr.Textbox(label="Search here", placeholder="Hatsune Miku")
-    button_query = gr.Button("Search")
-    output = gr.HTML(label="Search Results")
-    gr.Markdown(description)
-    query_input.submit(search_files, inputs=query_input, outputs=output)
-    button_query.click(search_files, inputs=query_input, outputs=output)
 if __name__ == "__main__":
-    demo.launch(debug=True, show_error=True)

 import re
 import ast
 import requests
+from pathlib import Path
+from typing import Optional
+# --- Constants ---
+DATABASE_URL = "https://raw.githubusercontent.com/R3gm/database_zip_files/main/archive/database.csv"
+DATABASE_PATH = Path("database.csv")
+DB_CONNECTION = None
+# --- UI Configuration ---
+APP_TITLE = "## 🔍 RVC Voice Finder"
+APP_DESCRIPTION = (
+    "This app digs through Hugging Face’s public zip files hunting for RVC models… "
+    "and occasionally brings back random stuff that has nothing to do with them. "
+    "Don’t worry though—the best matches are always shown first."
+)
+# --- Function Definitions ---
+def setup_database() -> Optional[sqlite3.Connection]:
+    """
+    Downloads the database, preprocesses it, and loads it into an in-memory
+    SQLite FTS5 table for fast text searching.
+    """
+    print("Setting up the database...")
+    try:
+        # Download the database file
+        print(f"Downloading data from {DATABASE_URL}...")
+        response = requests.get(DATABASE_URL, stream=True, timeout=30)
+        response.raise_for_status()
+        with open(DATABASE_PATH, "wb") as f:
+            for chunk in response.iter_content(chunk_size=8192):
+                f.write(chunk)
+        print("Download complete.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error downloading the database: {e}")
+        # Use local file if it exists, otherwise fail
+        if not DATABASE_PATH.exists():
+            raise FileNotFoundError(f"Failed to download and local copy not found at {DATABASE_PATH}") from e
+        print("Using existing local database file.")
+    # Load and preprocess the data with pandas
+    df = pd.read_csv(DATABASE_PATH)
+    df.rename(columns={"FILENAME": "Filename", "PARSED_URL": "URL", "MODEL_ID": "Repo ID"}, inplace=True)
+    df['normalized_filename'] = df['Filename'].apply(normalize_text)
+    df['URL'] = df['URL'].apply(clean_file_url)
+    df = df.reset_index().rename(columns={'index': 'rowid'}) # Use original index as rowid
+    # Connect to an in-memory SQLite database
+    conn = sqlite3.connect(":memory:", check_same_thread=False)
+    # Load the main data into a standard table
+    df.to_sql("models", conn, index=False, if_exists="replace")
+    # Create and populate the FTS5 virtual table for fast searching
+    conn.execute("""
+        CREATE VIRTUAL TABLE models_fts USING fts5(
+            Filename,
+            normalized_filename,
+            URL,
+            'Repo ID',
+            content='models',
+            content_rowid='rowid'
+        );
+    """)
+    conn.execute("""
+        INSERT INTO models_fts(rowid, Filename, normalized_filename, URL, "Repo ID")
+        SELECT rowid, Filename, normalized_filename, URL, "Repo ID" FROM models;
+    """)
+    print("Database setup complete and loaded into memory.")
+    return conn
+def normalize_text(text: str) -> str:
+    """
+    Cleans and standardizes text for searching by lowercasing,
+    removing accents, and replacing separators with spaces.
+    """
     if pd.isna(text):
         return ""
     text = text.lower()
+    text = ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn')
     return re.sub(r"[+()\-_/.]", " ", text)
+def clean_file_url(val) -> str:
     """
+    Cleans the URL column, handling lists stored as strings.
     """
+    if pd.isna(val):
+        return ""
+    if isinstance(val, str) and val.strip().startswith("["):
+        try:
+            # Safely evaluate string representation of a list
+            parsed_list = ast.literal_eval(val)
+            return ", ".join(map(str, parsed_list)) if isinstance(parsed_list, list) else val
+        except (ValueError, SyntaxError):
+            return val  # Return original string if parsing fails
+    return str(val)
+def search_models(query: str) -> Optional[pd.DataFrame]:
+    """
+    Searches the FTS table for models matching the query.
+    """
+    if not query.strip() or DB_CONNECTION is None:
+        return None
+    # Sanitize query and prepare for FTS by joining with "AND"
+    keywords = normalize_text(query).split()
+    fts_query = " AND ".join(keywords)
+    if not fts_query:
+        return None
+    # Use FTS MATCH operator for efficient search
+    sql_query = f"""
+        SELECT Filename, URL, "Repo ID"
+        FROM models_fts
+        WHERE normalized_filename MATCH ?
+        ORDER BY rank
+        LIMIT 250;
+    """
+    try:
+        df_results = pd.read_sql_query(sql_query, DB_CONNECTION, params=(fts_query,))
+    except sqlite3.OperationalError as e:
+        # This can happen if FTS query syntax is invalid
+        gr.Warning(f"Search error: {e}")
+        return None
+    if df_results.empty:
+        gr.Info("No matches found for your query.")
+        return None
+    return df_results
+# --- Main Execution & Gradio App ---
 if __name__ == "__main__":
+    DB_CONNECTION = setup_database()
+    with gr.Blocks() as demo:
+        gr.Markdown(APP_TITLE)
+        with gr.Row():
+            query_input = gr.Textbox(
+                label="Search here",
+                placeholder="e.g., Hatsune Miku",
+                scale=4,
+            )
+            search_button = gr.Button("Search", variant="primary", scale=1)
+        output_df = gr.DataFrame(
+            label="Search Results",
+            interactive=False,
+            headers=["Filename", "URL", "Repo ID"]
+        )
+        gr.Markdown(APP_DESCRIPTION)
+        # Event listeners
+        query_input.submit(search_models, inputs=query_input, outputs=output_df)
+        search_button.click(search_models, inputs=query_input, outputs=output_df)
+    demo.launch(debug=True, show_error=True)