Spaces:

omm7
/

test_law

Sleeping

omm7 commited on Jul 17, 2025

Commit

2d14e21

verified ·

1 Parent(s): 9b76816

Upload app.py with huggingface_hub

Files changed (1) hide show

app.py CHANGED Viewed

@@ -12,9 +12,20 @@ from openai import OpenAI
 # Load embedding model (same as used during vector creation)
 embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
-# Load ChromaDB client and existing collection
 chroma_client = chromadb.PersistentClient(path="./clause_index")
-collection = chroma_client.get_collection("legal_clauses")
 # Setup OpenAI client (Hugging Face endpoint)
 client = OpenAI(
@@ -61,7 +72,7 @@ def predict(question):
         retrieved_docs = results["documents"][0]
         metadatas = results["metadatas"][0]
         context_parts = [
-            f"[Clause Type: {m['clause_type']}] {doc}"
             for doc, m in zip(retrieved_docs, metadatas)
         ]
         context = "\n\n".join(context_parts)
@@ -87,6 +98,7 @@ def predict(question):
     except Exception as e:
         output = f"Error: {str(e)}"
     # Log query and response
     with scheduler.lock:
@@ -110,4 +122,3 @@ demo = gr.Interface(
 demo.queue()
 demo.launch()

 # Load embedding model (same as used during vector creation)
 embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
+# Load ChromaDB client and collection
 chroma_client = chromadb.PersistentClient(path="./clause_index")
+# Try to attach to existing collection
+collection_name = "legal_clauses"
+existing_collections = chroma_client.list_collections()
+collection = None
+for col in existing_collections:
+    if col.name == collection_name:
+        collection = chroma_client.get_collection(name=collection_name)
+        break
+if collection is None:
+    raise RuntimeError(f"Collection '{collection_name}' not found in persisted ChromaDB at ./clause_index")
 # Setup OpenAI client (Hugging Face endpoint)
 client = OpenAI(
         retrieved_docs = results["documents"][0]
         metadatas = results["metadatas"][0]
         context_parts = [
+            f"[Clause Type: {m.get('clause_type', 'Unknown')}] {doc}"
             for doc, m in zip(retrieved_docs, metadatas)
         ]
         context = "\n\n".join(context_parts)
     except Exception as e:
         output = f"Error: {str(e)}"
+        context = ""
     # Log query and response
     with scheduler.lock:
 demo.queue()
 demo.launch()