Spaces:

DINGOLANI
/

testautosearch

Sleeping

App Files Files Community

DINGOLANI commited on Jan 28, 2025

Commit

fd80bbd

verified ·

1 Parent(s): 17a81d1

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -44

app.py CHANGED Viewed

@@ -1,60 +1,37 @@
 import gradio as gr
-print("Gradio Version:", gr.__version__)
-from sentence_transformers import SentenceTransformer
 import pandas as pd
-from rapidfuzz import process  # For fuzzy matching
-# Pre-download the model during the build process
-model_name = "sentence-transformers/paraphrase-MiniLM-L3-v2"
-model = SentenceTransformer(model_name, cache_folder="./models")
-# Load your data from the CSV file
-data_file = "train_1.csv"  # Replace with your CSV file name
 try:
-    df = pd.read_csv(data_file, nrows=1000)  # Load only the first 1000 rows for testing
 except FileNotFoundError:
-    df = pd.DataFrame({"text": ["Sample data 1", "Sample data 2", "Sample data 3"]})  # Fallback sample data
-# Assuming your CSV has a column named 'text' with the data
-if "text" in df.columns:
-    data = df['text'].dropna().tolist()
-else:
-    data = ["Sample data 1", "Sample data 2", "Sample data 3"]  # Fallback if no 'text' column
-# Precompute embeddings
-embeddings = model.encode(data, convert_to_tensor=True)
-# Autocomplete with typo-tolerance using rapidfuzz
 def autocomplete(query):
     if not query.strip():
         return []  # Return empty if query is blank
-    matches = process.extract(query, data, scorer=process.WRatio, limit=5)  # Get top 5 fuzzy matches
-    return [match[0] for match in matches]  # Return the matched strings
-# Semantic search function
-def semantic_search(query):
-    if not query.strip():
-        return []  # Return empty if query is blank
-    query_embedding = model.encode(query, convert_to_tensor=True)
-    results = util.semantic_search(query_embedding, embeddings, top_k=5)
-    return [data[result['corpus_id']] for result in results[0]]
-# Define Gradio interface
 with gr.Blocks() as demo:
-    gr.Markdown("### Typo-Tolerant Autocomplete & Semantic Search")
-    with gr.Row():
-        query = gr.Textbox(label="Start typing for autocomplete", live=True)
-        autocomplete_output = gr.Textbox(label="Autocomplete Suggestions")
-    with gr.Row():
-        semantic_query = gr.Textbox(label="Enter your query for semantic search")
-        semantic_search_output = gr.Textbox(label="Semantic Search Results")
-    # Real-time autocomplete
     query.change(fn=autocomplete, inputs=query, outputs=autocomplete_output)
-    # Semantic search triggered on submit
-    semantic_query.submit(fn=semantic_search, inputs=semantic_query, outputs=semantic_search_output)
 demo.launch()

 import gradio as gr
+from sentence_transformers import SentenceTransformer, util
 import pandas as pd
+from rapidfuzz import process
+# Pre-download the model
+model_name = "sentence-transformers/all-MiniLM-L6-v2"
+model = SentenceTransformer(model_name)
+# Load your data
+data_file = "train_1.csv"  # Replace with your actual file
 try:
+    df = pd.read_csv(data_file, nrows=1000)  # Limit rows for testing
 except FileNotFoundError:
+    df = pd.DataFrame({"text": ["Sample data 1", "Sample data 2", "Sample data 3"]})  # Fallback data
+data = df['text'].dropna().tolist()
+# Autocomplete function
 def autocomplete(query):
     if not query.strip():
         return []  # Return empty if query is blank
+    matches = process.extract(query, data, scorer=process.WRatio, limit=5)
+    return "\n".join([match[0] for match in matches])  # Return matches as a multi-line string
+# Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("### Typo-Tolerant Autocomplete")
+    # Create a real-time Textbox with live=True
+    query = gr.Textbox(label="Start typing for autocomplete", live=True)
+    autocomplete_output = gr.Textbox(label="Autocomplete Suggestions", lines=5)
+    # Bind the autocomplete function to the Textbox
     query.change(fn=autocomplete, inputs=query, outputs=autocomplete_output)
 demo.launch()