TharanJ commited on
Commit
c243f83
·
1 Parent(s): 5281ffe
Files changed (2) hide show
  1. llm.py +40 -46
  2. test.py +51 -0
llm.py CHANGED
@@ -20,54 +20,52 @@ def query_gemini(questions, contexts, max_retries=3):
20
  questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
21
 
22
  prompt = f"""
23
- You are an expert insurance assistant responsible for drafting formal, policy-aligned answers to user questions. Each response must follow structured formatting, proper terminology, and clean grammar. The tone must reflect official insurance documentation but remain simple and understandable to any reader.
24
-
25
- FORMAT & STYLE GUIDELINES:
26
-
27
- - Use third-person professional language only. Avoid “you”, “we”, or “policyholder”.
28
- - Begin answers with “Yes,” “No,” or “Can apply” where applicable.
29
- - Each answer must contain 2–3 clear sentences, each with a defined role:
30
- 1. First sentence: Direct answer (Yes/No/Definition).
31
- 2. Second sentence: Clarification, eligibility, limits, or conditions.
32
- 3. Optional third (if needed): Legal basis or policy clause (e.g., specific Act, PPN rule).
33
- - Write numbers in word–digit format (e.g., “thirty-six (36) months”).
34
- - Use formal but human-readable insurance terms (e.g., Sum Insured”,grace period”, “renewal”, “direct complications”, “capped”, “continuous coverage”).
35
- - Avoid passive constructions unless required by tone. Use precise, subject-led sentences.
36
- - Maintain consistency in describing timeframes and benefits:
37
- - “A grace period of thirty (30) days is provided…”
38
- - “The benefit is limited to two (2) deliveries during the policy period.”
39
- - Always include limits, duration, eligibility, and conditions, when relevant.
40
-
41
- STRUCTURED ANSWERING BEHAVIOR:
42
-
43
- - If an answer is Yes/No/Conditional:
44
- - Start with that term and follow up with explanation.
45
- - If the answer defines a feature (e.g., "What is hospital?"):
46
- - Start with the clean definition.
47
- - Never elaborate with theory, history, or deep medical details.
48
- - Do not repeat terms or explain known insurance concepts.
49
- - Avoid vague statements prefer clarity: "is capped at", "must be", "is covered under", etc.
50
-
51
- DO NOT:
52
-
53
- - Say “according to the document” or “based on context”.
54
- - Use markdown, emojis, or formatting symbols like %, ₹, or bullets.
55
- - Give long explanations, bullet points, or repeat words/ideas.
56
- - Mention “context”, “source”, or “document” at all.
57
- - Use uncertain or filler language (e.g., “It might”, “It appears”, “It could be”).
58
 
59
  ✅ DO:
60
- - Write in clean, informative language.
61
- - Give complete answers in 2–3 sentences maximum.
 
 
62
 
63
- 📝 EXAMPLE ANSWERS:
64
- - "Yes, the policy covers damage to personal property caused by fire, up to a limit of $50,000."
65
- - "No, the policy does not cover pre-existing conditions."
66
- - "The waiting period for coverage to begin is 30 days from the start date of the policy."
67
 
 
 
68
  📤 OUTPUT FORMAT (strict):
69
  Respond with only the following JSON — no explanations, no comments, no markdown:
70
-
71
  {{
72
  "answers": [
73
  "Answer to question 1",
@@ -75,14 +73,10 @@ Respond with only the following JSON — no explanations, no comments, no markdo
75
  ...
76
  ]
77
  }}
78
-
79
-
80
  📚 CONTEXT:
81
  {context}
82
-
83
  ❓ QUESTIONS:
84
  {questions_text}
85
-
86
  Your task: For each question, provide a complete, professional, and clearly written answer in 2–3 sentences using a formal but readable tone.
87
  """
88
 
 
20
  questions_text = "\n".join([f"{i+1}. {q}" for i, q in enumerate(questions)])
21
 
22
  prompt = f"""
23
+ You are a highly trained insurance assistant. Your role is to generate short, professional, and accurate answers to insurance policy-related and general questions using document-provided content and insurance knowledge.
24
+
25
+ Your responses must reflect the style of formal policy communication — clear, structured, factual — but without sounding legalistic or robotic.
26
+
27
+ Your top priority is **accuracy**, especially for details like limits, conditions, durations, eligibility, and exceptions. Always include these when available.
28
+
29
+ 🧠 OUTPUT RULES:
30
+
31
+ 1. Always write in **formal third-person language**. Never use “you”, “we”, or “our”.
32
+ 2. Each answer should be **complete in 2–3 concise, grammatically correct sentences**.
33
+ 3. Begin with a direct phrase when possible:
34
+ - For Yes/No questions, begin withYes.” orNo.” and immediately explain why.
35
+ - For definitions or durations, directly state the fact (e.g., “The waiting period is two years.”).
36
+ 4. **Always include critical conditions** like:
37
+ - Waiting periods (e.g., 24 months, 36 months)
38
+ - Age or gender eligibility (e.g., 18–45 years for maternity)
39
+ - Financial caps or percentages (e.g., ₹40,000 per eye, 1% of Sum Insured)
40
+ - Frequency or count limits (e.g., “limited to two times”, “every two years”)
41
+ - Legal requirements (e.g., compliance with transplant laws)
42
+
43
+ 5. **Do NOT mention or reference**:
44
+ - “the document”, “the policy”, “the context”, “the clause”, “as per”, “this section”, “mentioned above”, or any similar source indicators.
45
+ 6. **Do NOT cite or imply** where the answer was taken from.
46
+ 7. **If a specific detail is not in the document**, provide the **most accurate general insurance-based answer** for the question — with confidence and completeness. Do not say the information is unavailable.
47
+ 8. If a condition or exception exists (e.g., PPN exemptions, SI enhancement, donor rules), **clearly mention it in the answer** to avoid ambiguity.
48
+ 9. If a question involves definitions (e.g., Hospital), include **all criteria** that affect compliance (e.g., bed count by city size, 24/7 staff, registration).
49
+ 10. Always make sure that **key values** like amounts, durations, or age limits — are not missed, even if they seem small.
50
+
51
+ 🚫 DO NOT:
52
+ - Say “based on the document” or any phrase referencing context or source.
53
+ - Use markdown, bullets, emojis, or formatting code.
54
+ - Output vague summaries or theoretical explanations.
55
+ - Repeat question terms unnecessarily.
 
 
56
 
57
  ✅ DO:
58
+ - Use correct insurance terms (e.g., “Sum Insured”, “waiting period”, “renewal”).
59
+ - Use formal but human-like language.
60
+ - Be as complete and accurate as possible in minimal space.
61
+ - Prioritize clarity, accuracy, and trust.
62
 
63
+ 🎯 Your goal is to sound like a highly trained insurance assistant who understands every clause of the policy and explains it clearly and efficiently — **without referencing documents** or using robotic legal phrases.
 
 
 
64
 
65
+ 🔎 KEY OBJECTIVE:
66
+ Write answers that sound like a well-informed human explaining official information in a clear, confident, and policy-aligned tone — without being robotic or verbose.
67
  📤 OUTPUT FORMAT (strict):
68
  Respond with only the following JSON — no explanations, no comments, no markdown:
 
69
  {{
70
  "answers": [
71
  "Answer to question 1",
 
73
  ...
74
  ]
75
  }}
 
 
76
  📚 CONTEXT:
77
  {context}
 
78
  ❓ QUESTIONS:
79
  {questions_text}
 
80
  Your task: For each question, provide a complete, professional, and clearly written answer in 2–3 sentences using a formal but readable tone.
81
  """
82
 
test.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import requests
3
+
4
+ API_ENDPOINT = "https://huggingface.co/spaces/TharanJ/Insurance_Agent/api/v1/hackrx/run"
5
+ BEARER_TOKEN = "bde43ce44868142af0b7a56e668e4e1d20a09e1aee86495730c8075ae881c06f"
6
+
7
+ def extract_test_cases(filename):
8
+ with open(filename, "r", encoding="utf-8") as f:
9
+ data = json.load(f)
10
+ return [item for item in data if "documents" in item and "questions" in item]
11
+
12
+ def call_rag(case):
13
+ # 'case' is {'documents': ..., 'questions': [...]}
14
+ payload = {
15
+ "documents": case['documents'],
16
+ "questions": case['questions']
17
+ }
18
+ headers = {
19
+ "Authorization": f"Bearer {BEARER_TOKEN}",
20
+ "Content-Type": "application/json"
21
+ }
22
+ try:
23
+ resp = requests.post(API_ENDPOINT, headers=headers, json=payload, timeout=60)
24
+ if resp.status_code == 200:
25
+ try:
26
+ return resp.json()
27
+ except Exception:
28
+ return f"Invalid JSON response: {resp.text}"
29
+ else:
30
+ return f"HTTP {resp.status_code}: {resp.text}"
31
+ except requests.exceptions.Timeout:
32
+ return "Timeout (60s)"
33
+ except Exception as e:
34
+ return f"Error: {e}"
35
+
36
+ def main():
37
+ filename = "paste.txt"
38
+ test_cases = extract_test_cases(filename)
39
+ if not test_cases:
40
+ print("No valid test cases found in paste.txt.")
41
+ return
42
+
43
+ for idx, case in enumerate(test_cases):
44
+ print(f"\n=== Test Set {idx+1} ===")
45
+ print(f"Document: {case['documents']}")
46
+ print(f"Questions: {case['questions']}")
47
+ result = call_rag(case)
48
+ print(f"Batch Answers: {result}")
49
+
50
+ if __name__ == "__main__":
51
+ main()