Spaces:

tiffany101
/

week14_interactive

Sleeping

App Files Files Community

tiffany101 commited on Nov 25, 2025

Commit

20ef8f2

verified ·

1 Parent(s): 8ae4dd9

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -40

app.py CHANGED Viewed

@@ -3,64 +3,50 @@ from chromadb import PersistentClient
 from sentence_transformers import SentenceTransformer
 import gradio as gr
 import os
 import shutil
 # ==========================
-# Step 1 — Download ChromaDB
 # ==========================
 persist_dir = "chromadb"
 os.makedirs(persist_dir, exist_ok=True)
-local_db_path = os.path.join(persist_dir, "chroma.sqlite3")
-if not os.path.exists(local_db_path):
-    print("📥 Downloading ChromaDB from Hugging Face Dataset...")
-    downloaded_db = hf_hub_download(
-        repo_id="tiffany101/my-chromadb",  # your dataset repo
-        filename="chroma.sqlite3",
         repo_type="dataset"
     )
-    shutil.copy(downloaded_db, local_db_path)
-    print(f"✅ Copied DB to {local_db_path}")
-else:
-    print("✅ Found local ChromaDB file, skipping download.")
 # ==========================
 # Step 2 — Load Chroma client
 # ==========================
 print("🚀 Initializing Chroma client...")
 client = PersistentClient(path=persist_dir)
-model = SentenceTransformer("all-MiniLM-L6-v2")
-# ==========================
-# Debug — List all collections
-# ==========================
-print("📊 Checking available collections...")
 collections = client.list_collections()
-if collections:
-    print("✅ Found the following collections:")
-    for c in collections:
-        print(f"  • {c.name}")
-else:
-    print("⚠️ No collections found in this database!")
-# Try to load or create collection
 try:
     collection = client.get_collection("my_collection")
     print("✅ Loaded existing collection: my_collection")
-    # ✅ Check how many documents are stored
-    print("🧩 Checking how many documents are stored...")
-    count = len(collection.get()["ids"])
-    print(f"✅ Collection contains {count} documents.")
-except Exception as e:
-    print(f"⚠️ Collection 'my_collection' not found ({e}), creating fallback...")
     collection = client.create_collection("my_collection")
-    # Add fallback data for demo
     sample_texts = [
-        "The Eiffel Tower is a famous landmark in Paris.",
-        "Machine learning helps computers learn from data.",
         "The stock market rose today amid strong earnings reports.",
         "The football team won the championship game.",
         "Scientists discovered a new planet outside our solar system."
@@ -71,23 +57,34 @@ except Exception as e:
         embeddings=embeddings.tolist(),
         ids=[str(i) for i in range(len(sample_texts))]
     )
-    print("✅ Added fallback demo data to new collection.")
 # ==========================
-# Step 3 — Define search
 # ==========================
 def semantic_search(query):
-    print(f"🔍 Received query: {query}")
     query_emb = model.encode([query])
     results = collection.query(query_embeddings=query_emb.tolist(), n_results=3)
     if not results["documents"] or len(results["documents"][0]) == 0:
-        print("⚠️ No matching documents found.")
         return "No matching documents found in the ChromaDB."
-    print(f"✅ Found {len(results['documents'][0])} results.")
     return "\n\n".join(results["documents"][0])
 # ==========================
-# Step 4 — Launch Gradio app
 # ==========================
 demo = gr.Interface(
     fn=semantic_search,

 from sentence_transformers import SentenceTransformer
 import gradio as gr
 import os
+import zipfile
 import shutil
 # ==========================
+# Step 1 — Download and unzip ChromaDB
 # ==========================
 persist_dir = "chromadb"
 os.makedirs(persist_dir, exist_ok=True)
+if not os.path.exists(os.path.join(persist_dir, "chroma.sqlite3")):
+    print("📥 Downloading ChromaDB zip from Hugging Face...")
+    db_zip_path = hf_hub_download(
+        repo_id="tiffany101/my-chromadb",   # your dataset repo
+        filename="chromadb.zip",
         repo_type="dataset"
     )
+    print("✅ Download complete, extracting...")
+    with zipfile.ZipFile(db_zip_path, "r") as zip_ref:
+        zip_ref.extractall(persist_dir)
+    print("✅ Extracted ChromaDB to:", persist_dir)
 # ==========================
 # Step 2 — Load Chroma client
 # ==========================
 print("🚀 Initializing Chroma client...")
 client = PersistentClient(path=persist_dir)
+# List collections for debugging
 collections = client.list_collections()
+print("📊 Collections found:", [c.name for c in collections])
+# Load or create fallback collection
 try:
     collection = client.get_collection("my_collection")
     print("✅ Loaded existing collection: my_collection")
+except Exception:
+    print("⚠️ my_collection not found, creating demo fallback...")
     collection = client.create_collection("my_collection")
+    # Add sample fallback data
+    model = SentenceTransformer("all-MiniLM-L6-v2")
     sample_texts = [
+        "The Eiffel Tower is one of the most famous landmarks in Paris.",
+        "Machine learning enables computers to learn from data.",
         "The stock market rose today amid strong earnings reports.",
         "The football team won the championship game.",
         "Scientists discovered a new planet outside our solar system."
         embeddings=embeddings.tolist(),
         ids=[str(i) for i in range(len(sample_texts))]
     )
 # ==========================
+# Step 3 — Verify collection size
+# ==========================
+print("🧩 Checking how many documents are stored...")
+try:
+    count = len(collection.get()["ids"])
+    print(f"✅ Collection contains {count} documents.")
+except Exception as e:
+    print("⚠️ Could not fetch count:", e)
+# ==========================
+# Step 4 — Load embedding model
+# ==========================
+model = SentenceTransformer("all-MiniLM-L6-v2")
+# ==========================
+# Step 5 — Define semantic search
 # ==========================
 def semantic_search(query):
     query_emb = model.encode([query])
     results = collection.query(query_embeddings=query_emb.tolist(), n_results=3)
     if not results["documents"] or len(results["documents"][0]) == 0:
         return "No matching documents found in the ChromaDB."
     return "\n\n".join(results["documents"][0])
 # ==========================
+# Step 6 — Launch Gradio app
 # ==========================
 demo = gr.Interface(
     fn=semantic_search,