tiffany101 commited on
Commit
88a99f5
·
verified ·
1 Parent(s): d8eb6b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -3,23 +3,24 @@ from chromadb import PersistentClient
3
  from sentence_transformers import SentenceTransformer
4
  import gradio as gr
5
  import os
 
6
 
7
  # ==========================
8
  # Step 1 — Download ChromaDB
9
  # ==========================
10
  persist_dir = "chromadb"
11
  os.makedirs(persist_dir, exist_ok=True)
12
- db_path = os.path.join(persist_dir, "chroma.sqlite3")
13
 
14
- if not os.path.exists(db_path):
15
  print("Downloading ChromaDB from Hugging Face Dataset...")
16
- db_path = hf_hub_download(
17
- repo_id="tiffany101/my-chromadb", # Your dataset repo
18
  filename="chroma.sqlite3",
19
  repo_type="dataset"
20
  )
21
- os.replace(db_path, os.path.join(persist_dir, "chroma.sqlite3"))
22
- print("Download complete!")
23
 
24
  # ==========================
25
  # Step 2 — Load Chroma client
@@ -27,15 +28,15 @@ if not os.path.exists(db_path):
27
  client = PersistentClient(path=persist_dir)
28
  model = SentenceTransformer("all-MiniLM-L6-v2")
29
 
30
- # Try to load existing collection, otherwise rebuild
31
  try:
32
  collection = client.get_collection("my_collection")
33
- print("Loaded existing ChromaDB collection: my_collection")
34
- except Exception as e:
35
- print("Collection not found, creating fallback collection...")
36
  collection = client.create_collection("my_collection")
37
 
38
- # Add minimal fallback data so the app still works
39
  sample_texts = [
40
  "The Eiffel Tower is a famous landmark in Paris.",
41
  "Machine learning helps computers learn from data.",
@@ -43,20 +44,19 @@ except Exception as e:
43
  "The football team won the championship game.",
44
  "Scientists discovered a new planet outside our solar system."
45
  ]
46
- sample_embeddings = model.encode(sample_texts)
47
  collection.add(
48
  documents=sample_texts,
49
- embeddings=sample_embeddings.tolist(),
50
  ids=[str(i) for i in range(len(sample_texts))]
51
  )
52
- print("Added fallback data.")
53
 
54
  # ==========================
55
  # Step 3 — Define search
56
  # ==========================
57
  def semantic_search(query):
58
- query_embedding = model.encode([query])
59
- results = collection.query(query_embeddings=query_embedding.tolist(), n_results=3)
60
  if not results["documents"] or len(results["documents"][0]) == 0:
61
  return "No matching documents found in the ChromaDB."
62
  return "\n\n".join(results["documents"][0])
 
3
  from sentence_transformers import SentenceTransformer
4
  import gradio as gr
5
  import os
6
+ import shutil
7
 
8
  # ==========================
9
  # Step 1 — Download ChromaDB
10
  # ==========================
11
  persist_dir = "chromadb"
12
  os.makedirs(persist_dir, exist_ok=True)
13
+ local_db_path = os.path.join(persist_dir, "chroma.sqlite3")
14
 
15
+ if not os.path.exists(local_db_path):
16
  print("Downloading ChromaDB from Hugging Face Dataset...")
17
+ downloaded_db = hf_hub_download(
18
+ repo_id="tiffany101/my-chromadb", # your dataset repo
19
  filename="chroma.sqlite3",
20
  repo_type="dataset"
21
  )
22
+ shutil.copy(downloaded_db, local_db_path)
23
+ print(f"Copied DB to {local_db_path}")
24
 
25
  # ==========================
26
  # Step 2 — Load Chroma client
 
28
  client = PersistentClient(path=persist_dir)
29
  model = SentenceTransformer("all-MiniLM-L6-v2")
30
 
31
+ # Try to load or create collection
32
  try:
33
  collection = client.get_collection("my_collection")
34
+ print("Loaded existing collection")
35
+ except Exception:
36
+ print("Collection not found, creating fallback...")
37
  collection = client.create_collection("my_collection")
38
 
39
+ # Add fallback data for demo
40
  sample_texts = [
41
  "The Eiffel Tower is a famous landmark in Paris.",
42
  "Machine learning helps computers learn from data.",
 
44
  "The football team won the championship game.",
45
  "Scientists discovered a new planet outside our solar system."
46
  ]
47
+ embeddings = model.encode(sample_texts)
48
  collection.add(
49
  documents=sample_texts,
50
+ embeddings=embeddings.tolist(),
51
  ids=[str(i) for i in range(len(sample_texts))]
52
  )
 
53
 
54
  # ==========================
55
  # Step 3 — Define search
56
  # ==========================
57
  def semantic_search(query):
58
+ query_emb = model.encode([query])
59
+ results = collection.query(query_embeddings=query_emb.tolist(), n_results=3)
60
  if not results["documents"] or len(results["documents"][0]) == 0:
61
  return "No matching documents found in the ChromaDB."
62
  return "\n\n".join(results["documents"][0])