File size: 5,622 Bytes
699386e
 
 
 
 
 
 
ce0153c
 
699386e
ce0153c
 
 
 
699386e
ce0153c
 
 
 
 
 
 
699386e
ce0153c
699386e
ce0153c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699386e
 
 
ce0153c
699386e
 
ce0153c
699386e
ce0153c
699386e
ce0153c
 
 
 
 
 
 
 
 
 
699386e
ce0153c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
699386e
ce0153c
 
 
699386e
ce0153c
699386e
ce0153c
699386e
 
ce0153c
 
 
 
 
 
 
 
 
 
 
 
 
c9693af
9035de0
 
ce0153c
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import pandas as pd
import sqlite3
import gradio as gr
import unicodedata
import re
import ast
import requests
from pathlib import Path
from typing import Optional

# --- Constants ---
DATABASE_URL = "https://raw.githubusercontent.com/R3gm/database_zip_files/main/archive/database.csv"
DATABASE_PATH = Path("database.csv")
DB_CONNECTION = None

# --- UI Configuration ---
APP_TITLE = "## 🔍 RVC Voice Finder"
APP_DESCRIPTION = (
    "This app digs through Hugging Face’s public zip files hunting for RVC models… "
    "and occasionally brings back random stuff that has nothing to do with them. "
    "Don’t worry though—the best matches are always shown first."
)

# --- Function Definitions ---

def setup_database() -> Optional[sqlite3.Connection]:
    """
    Downloads the database, preprocesses it, and loads it into an in-memory
    SQLite FTS5 table for fast text searching.
    """
    print("Setting up the database...")
    try:
        # Download the database file
        print(f"Downloading data from {DATABASE_URL}...")
        response = requests.get(DATABASE_URL, stream=True, timeout=30)
        response.raise_for_status()
        with open(DATABASE_PATH, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print("Download complete.")

    except requests.exceptions.RequestException as e:
        print(f"Error downloading the database: {e}")
        # Use local file if it exists, otherwise fail
        if not DATABASE_PATH.exists():
            raise FileNotFoundError(f"Failed to download and local copy not found at {DATABASE_PATH}") from e
        print("Using existing local database file.")

    # Load and preprocess the data with pandas
    df = pd.read_csv(DATABASE_PATH)
    df.rename(columns={"FILENAME": "Filename", "PARSED_URL": "URL", "MODEL_ID": "Repo ID"}, inplace=True)
    df['normalized_filename'] = df['Filename'].apply(normalize_text)
    df['URL'] = df['URL'].apply(clean_file_url)
    df = df.reset_index().rename(columns={'index': 'rowid'}) # Use original index as rowid

    # Connect to an in-memory SQLite database
    conn = sqlite3.connect(":memory:", check_same_thread=False)

    # Load the main data into a standard table
    df.to_sql("models", conn, index=False, if_exists="replace")

    # Create and populate the FTS5 virtual table for fast searching
    conn.execute("""
        CREATE VIRTUAL TABLE models_fts USING fts5(
            Filename,
            normalized_filename,
            URL,
            'Repo ID',
            content='models',
            content_rowid='rowid'
        );
    """)
    conn.execute("""
        INSERT INTO models_fts(rowid, Filename, normalized_filename, URL, "Repo ID")
        SELECT rowid, Filename, normalized_filename, URL, "Repo ID" FROM models;
    """)
    print("Database setup complete and loaded into memory.")
    return conn

def normalize_text(text: str) -> str:
    """
    Cleans and standardizes text for searching by lowercasing,
    removing accents, and replacing separators with spaces.
    """
    if pd.isna(text):
        return ""
    text = text.lower()
    text = ''.join(c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn')
    return re.sub(r"[+()\-_/.]", " ", text)

def clean_file_url(val) -> str:
    """
    Cleans the URL column, handling lists stored as strings.
    """
    if pd.isna(val):
        return ""
    if isinstance(val, str) and val.strip().startswith("["):
        try:
            # Safely evaluate string representation of a list
            parsed_list = ast.literal_eval(val)
            return ", ".join(map(str, parsed_list)) if isinstance(parsed_list, list) else val
        except (ValueError, SyntaxError):
            return val  # Return original string if parsing fails
    return str(val)

def search_models(query: str) -> Optional[pd.DataFrame]:
    """
    Searches the FTS table for models matching the query.
    """
    if not query.strip() or DB_CONNECTION is None:
        return None

    # Sanitize query and prepare for FTS by joining with "AND"
    keywords = normalize_text(query).split()
    fts_query = " AND ".join(keywords)

    if not fts_query:
        return None

    # Use FTS MATCH operator for efficient search
    sql_query = f"""
        SELECT Filename, URL, "Repo ID"
        FROM models_fts
        WHERE normalized_filename MATCH ?
        ORDER BY rank
        LIMIT 250;
    """
    
    try:
        df_results = pd.read_sql_query(sql_query, DB_CONNECTION, params=(fts_query,))
    except sqlite3.OperationalError as e:
        # This can happen if FTS query syntax is invalid
        gr.Warning(f"Search error: {e}")
        return None

    if df_results.empty:
        gr.Info("No matches found for your query.")
        return None

    return df_results

# --- Main Execution & Gradio App ---

if __name__ == "__main__":
    DB_CONNECTION = setup_database()
    
    with gr.Blocks() as demo:
        gr.Markdown(APP_TITLE)
        
        with gr.Row():
            query_input = gr.Textbox(
                label="Search here",
                placeholder="e.g., Hatsune Miku",
                scale=4,
            )
            search_button = gr.Button("Search", variant="primary", scale=1)

        output_df = gr.HTML(label="Search Results")


        gr.Markdown(APP_DESCRIPTION)

        # Event listeners
        query_input.submit(search_models, inputs=query_input, outputs=output_df)
        search_button.click(search_models, inputs=query_input, outputs=output_df)

    demo.launch(debug=True, show_error=True)