Spaces:

tiffany101
/

week14_interactive

Sleeping

App Files Files Community

tiffany101 commited on Nov 25, 2025

Commit

88a99f5

verified ·

1 Parent(s): d8eb6b3

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -16

app.py CHANGED Viewed

@@ -3,23 +3,24 @@ from chromadb import PersistentClient
 from sentence_transformers import SentenceTransformer
 import gradio as gr
 import os
 # ==========================
 # Step 1 — Download ChromaDB
 # ==========================
 persist_dir = "chromadb"
 os.makedirs(persist_dir, exist_ok=True)
-db_path = os.path.join(persist_dir, "chroma.sqlite3")
-if not os.path.exists(db_path):
     print("Downloading ChromaDB from Hugging Face Dataset...")
-    db_path = hf_hub_download(
-        repo_id="tiffany101/my-chromadb",   # Your dataset repo
         filename="chroma.sqlite3",
         repo_type="dataset"
     )
-    os.replace(db_path, os.path.join(persist_dir, "chroma.sqlite3"))
-    print("Download complete!")
 # ==========================
 # Step 2 — Load Chroma client
@@ -27,15 +28,15 @@ if not os.path.exists(db_path):
 client = PersistentClient(path=persist_dir)
 model = SentenceTransformer("all-MiniLM-L6-v2")
-# Try to load existing collection, otherwise rebuild
 try:
     collection = client.get_collection("my_collection")
-    print("Loaded existing ChromaDB collection: my_collection")
-except Exception as e:
-    print("Collection not found, creating fallback collection...")
     collection = client.create_collection("my_collection")
-    # Add minimal fallback data so the app still works
     sample_texts = [
         "The Eiffel Tower is a famous landmark in Paris.",
         "Machine learning helps computers learn from data.",
@@ -43,20 +44,19 @@ except Exception as e:
         "The football team won the championship game.",
         "Scientists discovered a new planet outside our solar system."
     ]
-    sample_embeddings = model.encode(sample_texts)
     collection.add(
         documents=sample_texts,
-        embeddings=sample_embeddings.tolist(),
         ids=[str(i) for i in range(len(sample_texts))]
     )
-    print("Added fallback data.")
 # ==========================
 # Step 3 — Define search
 # ==========================
 def semantic_search(query):
-    query_embedding = model.encode([query])
-    results = collection.query(query_embeddings=query_embedding.tolist(), n_results=3)
     if not results["documents"] or len(results["documents"][0]) == 0:
         return "No matching documents found in the ChromaDB."
     return "\n\n".join(results["documents"][0])

 from sentence_transformers import SentenceTransformer
 import gradio as gr
 import os
+import shutil
 # ==========================
 # Step 1 — Download ChromaDB
 # ==========================
 persist_dir = "chromadb"
 os.makedirs(persist_dir, exist_ok=True)
+local_db_path = os.path.join(persist_dir, "chroma.sqlite3")
+if not os.path.exists(local_db_path):
     print("Downloading ChromaDB from Hugging Face Dataset...")
+    downloaded_db = hf_hub_download(
+        repo_id="tiffany101/my-chromadb",  # your dataset repo
         filename="chroma.sqlite3",
         repo_type="dataset"
     )
+    shutil.copy(downloaded_db, local_db_path)
+    print(f"Copied DB to {local_db_path}")
 # ==========================
 # Step 2 — Load Chroma client
 client = PersistentClient(path=persist_dir)
 model = SentenceTransformer("all-MiniLM-L6-v2")
+# Try to load or create collection
 try:
     collection = client.get_collection("my_collection")
+    print("Loaded existing collection")
+except Exception:
+    print("Collection not found, creating fallback...")
     collection = client.create_collection("my_collection")
+    # Add fallback data for demo
     sample_texts = [
         "The Eiffel Tower is a famous landmark in Paris.",
         "Machine learning helps computers learn from data.",
         "The football team won the championship game.",
         "Scientists discovered a new planet outside our solar system."
     ]
+    embeddings = model.encode(sample_texts)
     collection.add(
         documents=sample_texts,
+        embeddings=embeddings.tolist(),
         ids=[str(i) for i in range(len(sample_texts))]
     )
 # ==========================
 # Step 3 — Define search
 # ==========================
 def semantic_search(query):
+    query_emb = model.encode([query])
+    results = collection.query(query_embeddings=query_emb.tolist(), n_results=3)
     if not results["documents"] or len(results["documents"][0]) == 0:
         return "No matching documents found in the ChromaDB."
     return "\n\n".join(results["documents"][0])