omm7 commited on
Commit
2d14e21
·
verified ·
1 Parent(s): 9b76816

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +15 -4
app.py CHANGED
@@ -12,9 +12,20 @@ from openai import OpenAI
12
  # Load embedding model (same as used during vector creation)
13
  embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
14
 
15
- # Load ChromaDB client and existing collection
16
  chroma_client = chromadb.PersistentClient(path="./clause_index")
17
- collection = chroma_client.get_collection("legal_clauses")
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Setup OpenAI client (Hugging Face endpoint)
20
  client = OpenAI(
@@ -61,7 +72,7 @@ def predict(question):
61
  retrieved_docs = results["documents"][0]
62
  metadatas = results["metadatas"][0]
63
  context_parts = [
64
- f"[Clause Type: {m['clause_type']}] {doc}"
65
  for doc, m in zip(retrieved_docs, metadatas)
66
  ]
67
  context = "\n\n".join(context_parts)
@@ -87,6 +98,7 @@ def predict(question):
87
 
88
  except Exception as e:
89
  output = f"Error: {str(e)}"
 
90
 
91
  # Log query and response
92
  with scheduler.lock:
@@ -110,4 +122,3 @@ demo = gr.Interface(
110
 
111
  demo.queue()
112
  demo.launch()
113
-
 
12
  # Load embedding model (same as used during vector creation)
13
  embed_model = SentenceTransformer("BAAI/bge-small-en-v1.5")
14
 
15
+ # Load ChromaDB client and collection
16
  chroma_client = chromadb.PersistentClient(path="./clause_index")
17
+
18
+ # Try to attach to existing collection
19
+ collection_name = "legal_clauses"
20
+ existing_collections = chroma_client.list_collections()
21
+ collection = None
22
+ for col in existing_collections:
23
+ if col.name == collection_name:
24
+ collection = chroma_client.get_collection(name=collection_name)
25
+ break
26
+
27
+ if collection is None:
28
+ raise RuntimeError(f"Collection '{collection_name}' not found in persisted ChromaDB at ./clause_index")
29
 
30
  # Setup OpenAI client (Hugging Face endpoint)
31
  client = OpenAI(
 
72
  retrieved_docs = results["documents"][0]
73
  metadatas = results["metadatas"][0]
74
  context_parts = [
75
+ f"[Clause Type: {m.get('clause_type', 'Unknown')}] {doc}"
76
  for doc, m in zip(retrieved_docs, metadatas)
77
  ]
78
  context = "\n\n".join(context_parts)
 
98
 
99
  except Exception as e:
100
  output = f"Error: {str(e)}"
101
+ context = ""
102
 
103
  # Log query and response
104
  with scheduler.lock:
 
122
 
123
  demo.queue()
124
  demo.launch()