Spaces:

tiffany101
/

week14_interactive

Sleeping

App Files Files Community

tiffany101 commited on Nov 25, 2025

Commit

d8eb6b3

verified ·

1 Parent(s): 5f41904

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -23

app.py CHANGED Viewed

@@ -4,9 +4,9 @@ from sentence_transformers import SentenceTransformer
 import gradio as gr
 import os
-# =============================
-# Download ChromaDB from Hugging Face dataset
-# =============================
 persist_dir = "chromadb"
 os.makedirs(persist_dir, exist_ok=True)
 db_path = os.path.join(persist_dir, "chroma.sqlite3")
@@ -14,48 +14,62 @@ db_path = os.path.join(persist_dir, "chroma.sqlite3")
 if not os.path.exists(db_path):
     print("Downloading ChromaDB from Hugging Face Dataset...")
     db_path = hf_hub_download(
-        repo_id="tiffany101/my-chromadb",   # your dataset repo
         filename="chroma.sqlite3",
-        repo_type="dataset"                 # ensure it's recognized as a dataset
     )
-    print("Download complete:", db_path)
-# =============================
-# Load or create Chroma collection
-# =============================
 client = PersistentClient(path=persist_dir)
 try:
     collection = client.get_collection("my_collection")
-    print("Loaded existing collection: my_collection")
 except Exception as e:
-    print("Collection not found — creating new one...")
     collection = client.create_collection("my_collection")
-# =============================
-# Load embedding model
-# =============================
-model = SentenceTransformer("all-MiniLM-L6-v2")
-# =============================
-# Semantic Search Function
-# =============================
 def semantic_search(query):
     query_embedding = model.encode([query])
     results = collection.query(query_embeddings=query_embedding.tolist(), n_results=3)
-    if len(results["documents"][0]) == 0:
         return "No matching documents found in the ChromaDB."
     return "\n\n".join(results["documents"][0])
-# =============================
-# Gradio App
-# =============================
 demo = gr.Interface(
     fn=semantic_search,
     inputs=gr.Textbox(label="Enter your search query"),
     outputs=gr.Textbox(label="Top Matches"),
     title="Semantic Search Engine",
-    description="Search over your dataset using semantic similarity."
 )
 if __name__ == "__main__":

 import gradio as gr
 import os
+# ==========================
+# Step 1 — Download ChromaDB
+# ==========================
 persist_dir = "chromadb"
 os.makedirs(persist_dir, exist_ok=True)
 db_path = os.path.join(persist_dir, "chroma.sqlite3")
 if not os.path.exists(db_path):
     print("Downloading ChromaDB from Hugging Face Dataset...")
     db_path = hf_hub_download(
+        repo_id="tiffany101/my-chromadb",   # Your dataset repo
         filename="chroma.sqlite3",
+        repo_type="dataset"
     )
+    os.replace(db_path, os.path.join(persist_dir, "chroma.sqlite3"))
+    print("Download complete!")
+# ==========================
+# Step 2 — Load Chroma client
+# ==========================
 client = PersistentClient(path=persist_dir)
+model = SentenceTransformer("all-MiniLM-L6-v2")
+# Try to load existing collection, otherwise rebuild
 try:
     collection = client.get_collection("my_collection")
+    print("Loaded existing ChromaDB collection: my_collection")
 except Exception as e:
+    print("Collection not found, creating fallback collection...")
     collection = client.create_collection("my_collection")
+    # Add minimal fallback data so the app still works
+    sample_texts = [
+        "The Eiffel Tower is a famous landmark in Paris.",
+        "Machine learning helps computers learn from data.",
+        "The stock market rose today amid strong earnings reports.",
+        "The football team won the championship game.",
+        "Scientists discovered a new planet outside our solar system."
+    ]
+    sample_embeddings = model.encode(sample_texts)
+    collection.add(
+        documents=sample_texts,
+        embeddings=sample_embeddings.tolist(),
+        ids=[str(i) for i in range(len(sample_texts))]
+    )
+    print("Added fallback data.")
+# ==========================
+# Step 3 — Define search
+# ==========================
 def semantic_search(query):
     query_embedding = model.encode([query])
     results = collection.query(query_embeddings=query_embedding.tolist(), n_results=3)
+    if not results["documents"] or len(results["documents"][0]) == 0:
         return "No matching documents found in the ChromaDB."
     return "\n\n".join(results["documents"][0])
+# ==========================
+# Step 4 — Launch Gradio app
+# ==========================
 demo = gr.Interface(
     fn=semantic_search,
     inputs=gr.Textbox(label="Enter your search query"),
     outputs=gr.Textbox(label="Top Matches"),
     title="Semantic Search Engine",
+    description="Search across your Chroma database using semantic similarity."
 )
 if __name__ == "__main__":