tiffany101 commited on
Commit
d8eb6b3
·
verified ·
1 Parent(s): 5f41904

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -23
app.py CHANGED
@@ -4,9 +4,9 @@ from sentence_transformers import SentenceTransformer
4
  import gradio as gr
5
  import os
6
 
7
- # =============================
8
- # Download ChromaDB from Hugging Face dataset
9
- # =============================
10
  persist_dir = "chromadb"
11
  os.makedirs(persist_dir, exist_ok=True)
12
  db_path = os.path.join(persist_dir, "chroma.sqlite3")
@@ -14,48 +14,62 @@ db_path = os.path.join(persist_dir, "chroma.sqlite3")
14
  if not os.path.exists(db_path):
15
  print("Downloading ChromaDB from Hugging Face Dataset...")
16
  db_path = hf_hub_download(
17
- repo_id="tiffany101/my-chromadb", # your dataset repo
18
  filename="chroma.sqlite3",
19
- repo_type="dataset" # ensure it's recognized as a dataset
20
  )
21
- print("Download complete:", db_path)
 
22
 
23
- # =============================
24
- # Load or create Chroma collection
25
- # =============================
26
  client = PersistentClient(path=persist_dir)
 
27
 
 
28
  try:
29
  collection = client.get_collection("my_collection")
30
- print("Loaded existing collection: my_collection")
31
  except Exception as e:
32
- print("Collection not found creating new one...")
33
  collection = client.create_collection("my_collection")
34
 
35
- # =============================
36
- # Load embedding model
37
- # =============================
38
- model = SentenceTransformer("all-MiniLM-L6-v2")
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # =============================
41
- # Semantic Search Function
42
- # =============================
43
  def semantic_search(query):
44
  query_embedding = model.encode([query])
45
  results = collection.query(query_embeddings=query_embedding.tolist(), n_results=3)
46
- if len(results["documents"][0]) == 0:
47
  return "No matching documents found in the ChromaDB."
48
  return "\n\n".join(results["documents"][0])
49
 
50
- # =============================
51
- # Gradio App
52
- # =============================
53
  demo = gr.Interface(
54
  fn=semantic_search,
55
  inputs=gr.Textbox(label="Enter your search query"),
56
  outputs=gr.Textbox(label="Top Matches"),
57
  title="Semantic Search Engine",
58
- description="Search over your dataset using semantic similarity."
59
  )
60
 
61
  if __name__ == "__main__":
 
4
  import gradio as gr
5
  import os
6
 
7
+ # ==========================
8
+ # Step 1 Download ChromaDB
9
+ # ==========================
10
  persist_dir = "chromadb"
11
  os.makedirs(persist_dir, exist_ok=True)
12
  db_path = os.path.join(persist_dir, "chroma.sqlite3")
 
14
  if not os.path.exists(db_path):
15
  print("Downloading ChromaDB from Hugging Face Dataset...")
16
  db_path = hf_hub_download(
17
+ repo_id="tiffany101/my-chromadb", # Your dataset repo
18
  filename="chroma.sqlite3",
19
+ repo_type="dataset"
20
  )
21
+ os.replace(db_path, os.path.join(persist_dir, "chroma.sqlite3"))
22
+ print("Download complete!")
23
 
24
+ # ==========================
25
+ # Step 2 Load Chroma client
26
+ # ==========================
27
  client = PersistentClient(path=persist_dir)
28
+ model = SentenceTransformer("all-MiniLM-L6-v2")
29
 
30
+ # Try to load existing collection, otherwise rebuild
31
  try:
32
  collection = client.get_collection("my_collection")
33
+ print("Loaded existing ChromaDB collection: my_collection")
34
  except Exception as e:
35
+ print("Collection not found, creating fallback collection...")
36
  collection = client.create_collection("my_collection")
37
 
38
+ # Add minimal fallback data so the app still works
39
+ sample_texts = [
40
+ "The Eiffel Tower is a famous landmark in Paris.",
41
+ "Machine learning helps computers learn from data.",
42
+ "The stock market rose today amid strong earnings reports.",
43
+ "The football team won the championship game.",
44
+ "Scientists discovered a new planet outside our solar system."
45
+ ]
46
+ sample_embeddings = model.encode(sample_texts)
47
+ collection.add(
48
+ documents=sample_texts,
49
+ embeddings=sample_embeddings.tolist(),
50
+ ids=[str(i) for i in range(len(sample_texts))]
51
+ )
52
+ print("Added fallback data.")
53
 
54
+ # ==========================
55
+ # Step 3 — Define search
56
+ # ==========================
57
  def semantic_search(query):
58
  query_embedding = model.encode([query])
59
  results = collection.query(query_embeddings=query_embedding.tolist(), n_results=3)
60
+ if not results["documents"] or len(results["documents"][0]) == 0:
61
  return "No matching documents found in the ChromaDB."
62
  return "\n\n".join(results["documents"][0])
63
 
64
+ # ==========================
65
+ # Step 4 — Launch Gradio app
66
+ # ==========================
67
  demo = gr.Interface(
68
  fn=semantic_search,
69
  inputs=gr.Textbox(label="Enter your search query"),
70
  outputs=gr.Textbox(label="Top Matches"),
71
  title="Semantic Search Engine",
72
+ description="Search across your Chroma database using semantic similarity."
73
  )
74
 
75
  if __name__ == "__main__":