Spaces:
Sleeping
Sleeping
Commit ·
031582f
1
Parent(s): 95674da
sorted
Browse files
app.py
CHANGED
|
@@ -50,11 +50,46 @@ def load_resources():
|
|
| 50 |
return industry_index, industry_chunks, circular_index, circular_chunks
|
| 51 |
industry_index, industry_chunks, circular_index, circular_chunks = load_resources()
|
| 52 |
|
| 53 |
-
def retrieve_relevant_chunks(query, index, chunks, top_k=
|
| 54 |
model = load_sentence_transformer()
|
| 55 |
query_embedding = model.encode([query], convert_to_numpy=True)
|
| 56 |
distances, indices = index.search(query_embedding, top_k)
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
return retrieved_chunks
|
| 59 |
|
| 60 |
def circular_compliance():
|
|
@@ -68,29 +103,28 @@ def circular_compliance():
|
|
| 68 |
if user_query:
|
| 69 |
relevant_chunks = retrieve_relevant_chunks(user_query, circular_index, circular_chunks)
|
| 70 |
context = "\n".join(relevant_chunks)
|
| 71 |
-
prompt =
|
| 72 |
You are an expert RBI compliance analyst. Based on the provided RBI Master Circular on Management of Advances:
|
|
|
|
| 73 |
{context}
|
|
|
|
| 74 |
Please analyze the following scenario for compliance:
|
| 75 |
{user_query}
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
-
|
| 79 |
-
-
|
| 80 |
-
|
| 81 |
-
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
4.
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
- Clear guidance on what needs to be done for compliance
|
| 92 |
-
- Specific steps to address any non-compliance
|
| 93 |
-
Please provide definitive guidance based solely on the circular content, avoiding ambiguity or speculation.
|
| 94 |
Response:
|
| 95 |
"""
|
| 96 |
chat_completion = client.chat.completions.create(
|
|
|
|
| 50 |
return industry_index, industry_chunks, circular_index, circular_chunks
|
| 51 |
industry_index, industry_chunks, circular_index, circular_chunks = load_resources()
|
| 52 |
|
| 53 |
+
def retrieve_relevant_chunks(query, index, chunks, top_k=10):
|
| 54 |
model = load_sentence_transformer()
|
| 55 |
query_embedding = model.encode([query], convert_to_numpy=True)
|
| 56 |
distances, indices = index.search(query_embedding, top_k)
|
| 57 |
+
|
| 58 |
+
# Get more chunks initially and filter for relevance
|
| 59 |
+
retrieved_chunks = []
|
| 60 |
+
query_lower = query.lower()
|
| 61 |
+
|
| 62 |
+
# Check if query is about general term loans vs share financing
|
| 63 |
+
is_general_loan_query = any(term in query_lower for term in [
|
| 64 |
+
'term loan', 'manufacturing', 'documentation requirement',
|
| 65 |
+
'credit sanction', 'loan sanction', 'general lending'
|
| 66 |
+
]) and not any(term in query_lower for term in [
|
| 67 |
+
'share', 'debenture', 'bond', 'equity', 'capital market'
|
| 68 |
+
])
|
| 69 |
+
|
| 70 |
+
for i, idx in enumerate(indices[0]):
|
| 71 |
+
chunk_text = str(chunks[idx]).lower()
|
| 72 |
+
|
| 73 |
+
# If it's a general loan query, deprioritize share-related chunks
|
| 74 |
+
if is_general_loan_query and any(term in chunk_text for term in [
|
| 75 |
+
'advances against shares', 'debentures', 'bonds', 'capital market',
|
| 76 |
+
'shareholding', 'equity acquisition'
|
| 77 |
+
]):
|
| 78 |
+
# Skip clearly irrelevant share-related chunks for general loan queries
|
| 79 |
+
continue
|
| 80 |
+
|
| 81 |
+
retrieved_chunks.append(chunks[idx])
|
| 82 |
+
if len(retrieved_chunks) >= 5: # Return top 5 relevant chunks
|
| 83 |
+
break
|
| 84 |
+
|
| 85 |
+
# If we don't have enough chunks, add some of the skipped ones
|
| 86 |
+
if len(retrieved_chunks) < 3:
|
| 87 |
+
for idx in indices[0]:
|
| 88 |
+
if len(retrieved_chunks) >= 5:
|
| 89 |
+
break
|
| 90 |
+
if chunks[idx] not in retrieved_chunks:
|
| 91 |
+
retrieved_chunks.append(chunks[idx])
|
| 92 |
+
|
| 93 |
return retrieved_chunks
|
| 94 |
|
| 95 |
def circular_compliance():
|
|
|
|
| 103 |
if user_query:
|
| 104 |
relevant_chunks = retrieve_relevant_chunks(user_query, circular_index, circular_chunks)
|
| 105 |
context = "\n".join(relevant_chunks)
|
| 106 |
+
prompt = f"""
|
| 107 |
You are an expert RBI compliance analyst. Based on the provided RBI Master Circular on Management of Advances:
|
| 108 |
+
|
| 109 |
{context}
|
| 110 |
+
|
| 111 |
Please analyze the following scenario for compliance:
|
| 112 |
{user_query}
|
| 113 |
+
|
| 114 |
+
CRITICAL INSTRUCTIONS:
|
| 115 |
+
- If the provided context is about share financing, debentures, bonds, or capital market exposures, and the query is about GENERAL TERM LOANS, clearly state that the retrieved information is not relevant to the query
|
| 116 |
+
- Focus ONLY on requirements that apply to standard term loans to manufacturing/business entities
|
| 117 |
+
- Do NOT conflate share financing requirements with general term loan requirements
|
| 118 |
+
- If the context doesn't contain information relevant to the specific query, state this clearly and indicate what type of information would be needed
|
| 119 |
+
|
| 120 |
+
Provide analysis with this structure:
|
| 121 |
+
1. Relevance Assessment: Is the provided context relevant to the query?
|
| 122 |
+
2. Actual Requirements: What are the real requirements for this scenario based on relevant sections?
|
| 123 |
+
3. Documentation: Specific documents actually required
|
| 124 |
+
4. Approval Process: Required approvals and delegation levels
|
| 125 |
+
5. Compliance Steps: Practical steps for compliance
|
| 126 |
+
|
| 127 |
+
Base your response ONLY on information directly relevant to the query type.
|
|
|
|
|
|
|
|
|
|
| 128 |
Response:
|
| 129 |
"""
|
| 130 |
chat_completion = client.chat.completions.create(
|