DINGOLANI commited on
Commit
fd80bbd
·
verified ·
1 Parent(s): 17a81d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -44
app.py CHANGED
@@ -1,60 +1,37 @@
1
  import gradio as gr
2
- print("Gradio Version:", gr.__version__)
3
- from sentence_transformers import SentenceTransformer
4
  import pandas as pd
5
- from rapidfuzz import process # For fuzzy matching
6
 
7
- # Pre-download the model during the build process
8
- model_name = "sentence-transformers/paraphrase-MiniLM-L3-v2"
9
- model = SentenceTransformer(model_name, cache_folder="./models")
10
 
11
- # Load your data from the CSV file
12
- data_file = "train_1.csv" # Replace with your CSV file name
13
  try:
14
- df = pd.read_csv(data_file, nrows=1000) # Load only the first 1000 rows for testing
15
  except FileNotFoundError:
16
- df = pd.DataFrame({"text": ["Sample data 1", "Sample data 2", "Sample data 3"]}) # Fallback sample data
17
 
18
- # Assuming your CSV has a column named 'text' with the data
19
- if "text" in df.columns:
20
- data = df['text'].dropna().tolist()
21
- else:
22
- data = ["Sample data 1", "Sample data 2", "Sample data 3"] # Fallback if no 'text' column
23
 
24
- # Precompute embeddings
25
- embeddings = model.encode(data, convert_to_tensor=True)
26
-
27
- # Autocomplete with typo-tolerance using rapidfuzz
28
  def autocomplete(query):
29
  if not query.strip():
30
  return [] # Return empty if query is blank
31
- matches = process.extract(query, data, scorer=process.WRatio, limit=5) # Get top 5 fuzzy matches
32
- return [match[0] for match in matches] # Return the matched strings
33
-
34
- # Semantic search function
35
- def semantic_search(query):
36
- if not query.strip():
37
- return [] # Return empty if query is blank
38
- query_embedding = model.encode(query, convert_to_tensor=True)
39
- results = util.semantic_search(query_embedding, embeddings, top_k=5)
40
- return [data[result['corpus_id']] for result in results[0]]
41
 
42
- # Define Gradio interface
43
  with gr.Blocks() as demo:
44
- gr.Markdown("### Typo-Tolerant Autocomplete & Semantic Search")
45
-
46
- with gr.Row():
47
- query = gr.Textbox(label="Start typing for autocomplete", live=True)
48
- autocomplete_output = gr.Textbox(label="Autocomplete Suggestions")
49
-
50
- with gr.Row():
51
- semantic_query = gr.Textbox(label="Enter your query for semantic search")
52
- semantic_search_output = gr.Textbox(label="Semantic Search Results")
53
-
54
- # Real-time autocomplete
55
  query.change(fn=autocomplete, inputs=query, outputs=autocomplete_output)
56
-
57
- # Semantic search triggered on submit
58
- semantic_query.submit(fn=semantic_search, inputs=semantic_query, outputs=semantic_search_output)
59
 
60
  demo.launch()
 
1
  import gradio as gr
2
+ from sentence_transformers import SentenceTransformer, util
 
3
  import pandas as pd
4
+ from rapidfuzz import process
5
 
6
+ # Pre-download the model
7
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
8
+ model = SentenceTransformer(model_name)
9
 
10
+ # Load your data
11
+ data_file = "train_1.csv" # Replace with your actual file
12
  try:
13
+ df = pd.read_csv(data_file, nrows=1000) # Limit rows for testing
14
  except FileNotFoundError:
15
+ df = pd.DataFrame({"text": ["Sample data 1", "Sample data 2", "Sample data 3"]}) # Fallback data
16
 
17
+ data = df['text'].dropna().tolist()
 
 
 
 
18
 
19
+ # Autocomplete function
 
 
 
20
  def autocomplete(query):
21
  if not query.strip():
22
  return [] # Return empty if query is blank
23
+ matches = process.extract(query, data, scorer=process.WRatio, limit=5)
24
+ return "\n".join([match[0] for match in matches]) # Return matches as a multi-line string
 
 
 
 
 
 
 
 
25
 
26
+ # Gradio interface
27
  with gr.Blocks() as demo:
28
+ gr.Markdown("### Typo-Tolerant Autocomplete")
29
+
30
+ # Create a real-time Textbox with live=True
31
+ query = gr.Textbox(label="Start typing for autocomplete", live=True)
32
+ autocomplete_output = gr.Textbox(label="Autocomplete Suggestions", lines=5)
33
+
34
+ # Bind the autocomplete function to the Textbox
 
 
 
 
35
  query.change(fn=autocomplete, inputs=query, outputs=autocomplete_output)
 
 
 
36
 
37
  demo.launch()