Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -12,9 +12,20 @@ from openai import OpenAI
|
|
| 12 |
# Load embedding model (same as used during vector creation)
|
| 13 |
embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
|
| 14 |
|
| 15 |
-
# Load ChromaDB client and
|
| 16 |
chroma_client = chromadb.PersistentClient(path="./clause_index")
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
# Setup OpenAI client (Hugging Face endpoint)
|
| 20 |
client = OpenAI(
|
|
@@ -61,7 +72,7 @@ def predict(question):
|
|
| 61 |
retrieved_docs = results["documents"][0]
|
| 62 |
metadatas = results["metadatas"][0]
|
| 63 |
context_parts = [
|
| 64 |
-
f"[Clause Type: {m
|
| 65 |
for doc, m in zip(retrieved_docs, metadatas)
|
| 66 |
]
|
| 67 |
context = "\n\n".join(context_parts)
|
|
@@ -87,6 +98,7 @@ def predict(question):
|
|
| 87 |
|
| 88 |
except Exception as e:
|
| 89 |
output = f"Error: {str(e)}"
|
|
|
|
| 90 |
|
| 91 |
# Log query and response
|
| 92 |
with scheduler.lock:
|
|
@@ -110,4 +122,3 @@ demo = gr.Interface(
|
|
| 110 |
|
| 111 |
demo.queue()
|
| 112 |
demo.launch()
|
| 113 |
-
|
|
|
|
| 12 |
# Load embedding model (same as used during vector creation)
|
| 13 |
embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
|
| 14 |
|
| 15 |
+
# Load ChromaDB client and collection
|
| 16 |
chroma_client = chromadb.PersistentClient(path="./clause_index")
|
| 17 |
+
|
| 18 |
+
# Try to attach to existing collection
|
| 19 |
+
collection_name = "legal_clauses"
|
| 20 |
+
existing_collections = chroma_client.list_collections()
|
| 21 |
+
collection = None
|
| 22 |
+
for col in existing_collections:
|
| 23 |
+
if col.name == collection_name:
|
| 24 |
+
collection = chroma_client.get_collection(name=collection_name)
|
| 25 |
+
break
|
| 26 |
+
|
| 27 |
+
if collection is None:
|
| 28 |
+
raise RuntimeError(f"Collection '{collection_name}' not found in persisted ChromaDB at ./clause_index")
|
| 29 |
|
| 30 |
# Setup OpenAI client (Hugging Face endpoint)
|
| 31 |
client = OpenAI(
|
|
|
|
| 72 |
retrieved_docs = results["documents"][0]
|
| 73 |
metadatas = results["metadatas"][0]
|
| 74 |
context_parts = [
|
| 75 |
+
f"[Clause Type: {m.get('clause_type', 'Unknown')}] {doc}"
|
| 76 |
for doc, m in zip(retrieved_docs, metadatas)
|
| 77 |
]
|
| 78 |
context = "\n\n".join(context_parts)
|
|
|
|
| 98 |
|
| 99 |
except Exception as e:
|
| 100 |
output = f"Error: {str(e)}"
|
| 101 |
+
context = ""
|
| 102 |
|
| 103 |
# Log query and response
|
| 104 |
with scheduler.lock:
|
|
|
|
| 122 |
|
| 123 |
demo.queue()
|
| 124 |
demo.launch()
|
|
|