decodingdatascience commited on
Commit
c4efa05
·
verified ·
1 Parent(s): d8177b2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -125
app.py CHANGED
@@ -1,125 +1,125 @@
1
- # app.py — Insurance Q&A (RAG) with system prompt + simple config
2
- import os
3
- import gradio as gr
4
- from pinecone import Pinecone, ServerlessSpec
5
- from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
6
- from llama_index.vector_stores.pinecone import PineconeVectorStore
7
- from llama_index.embeddings.openai import OpenAIEmbedding
8
- from llama_index.llms.openai import OpenAI
9
-
10
- # --- System Prompt (polite + answer-from-document constraint) ---
11
- SYSTEM_PROMPT = """You are Aisha, a polite and professional Insurance assistant.
12
- Answer ONLY using the information found in the indexed insurance document(s).
13
- If the answer is not in the document(s), say: "I couldn’t find that in the document."
14
- Keep responses concise, helpful, and courteous.
15
- """
16
-
17
- # ===== Minimal CONFIG (only necessary keys) =====
18
- PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
19
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
- if not PINECONE_API_KEY or not OPENAI_API_KEY:
21
- raise RuntimeError("Missing PINECONE_API_KEY or OPENAI_API_KEY (set them in Space → Settings → Variables).")
22
-
23
- DATA_DIR = "data" # Put insurance docs here (e.g., data/insurance.pdf)
24
- LOGO_PATH = os.path.join(DATA_DIR, "dds_logo.png") # Mandatory logo
25
- if not os.path.exists(LOGO_PATH):
26
- raise RuntimeError("Logo not found: data/dds_logo.png.png (commit it to your Space repo).")
27
-
28
- EMBED_MODEL = "text-embedding-3-small" # 1536-dim
29
- LLM_MODEL = "gpt-4o-mini"
30
- TOP_K = 4 # internal similarity_top_k
31
-
32
- # ===== LlamaIndex / Pinecone (simple, fixed serverless: aws/us-east-1) =====
33
- Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL, api_key=OPENAI_API_KEY)
34
- Settings.llm = OpenAI(model=LLM_MODEL, api_key=OPENAI_API_KEY, system_prompt=SYSTEM_PROMPT)
35
-
36
- pc = Pinecone(api_key=PINECONE_API_KEY)
37
- def ensure_index(name: str, dim: int = 1536):
38
- names = [i["name"] for i in pc.list_indexes()]
39
- if name not in names:
40
- pc.create_index(
41
- name=name, dimension=dim, metric="cosine",
42
- spec=ServerlessSpec(cloud="aws", region="us-east-1"),
43
- )
44
- return pc.Index(name)
45
-
46
- # Fixed index name for simplicity
47
- pinecone_index = ensure_index("dds-insurance-index", dim=1536)
48
- vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
49
-
50
- def bootstrap_index():
51
- if not os.path.isdir(DATA_DIR):
52
- raise RuntimeError("No 'data/' directory found. Commit your documents to data/ in the Space repo.")
53
- docs = SimpleDirectoryReader(DATA_DIR).load_data()
54
- if not docs:
55
- raise RuntimeError("No documents found in data/. Add e.g., data/insurance.pdf")
56
- storage_ctx = StorageContext.from_defaults(vector_store=vector_store)
57
- VectorStoreIndex.from_documents(docs, storage_context=storage_ctx, show_progress=True)
58
-
59
- bootstrap_index()
60
-
61
- def answer(query: str) -> str:
62
- if not query.strip():
63
- return "Please enter a question (or select one from the FAQ list)."
64
- index = VectorStoreIndex.from_vector_store(vector_store)
65
- resp = index.as_query_engine(similarity_top_k=TOP_K).query(query)
66
- return str(resp)
67
-
68
- FAQS = [
69
- "",
70
- "What benefits are covered under the policy?",
71
- "How do I file a claim and what documents are required?",
72
- "What are the exclusions and limitations?",
73
- "Is pre-authorization needed for hospitalization?",
74
- "What is the reimbursement timeline?",
75
- "How are outpatient vs inpatient services handled?",
76
- "How can I check my network hospitals/clinics?",
77
- "What is the co-pay or deductible policy?",
78
- ]
79
-
80
- def use_faq(selected_faq: str, free_text: str):
81
- prompt = (selected_faq or "").strip() or (free_text or "").strip()
82
- if not prompt:
83
- return "", "Please select a FAQ or type your question."
84
- return prompt, answer(prompt)
85
-
86
- # ===== UI =====
87
- CSS = """
88
- .header { display:flex; flex-direction:column; align-items:center; gap:6px; }
89
- .logo img { width:300px; height:300px; object-fit:contain; } /* fixed 300x300 */
90
- .title { text-align:center; font-weight:700; font-size:1.4rem; margin:6px 0 0 0; }
91
- .subnote { text-align:center; margin-top:-2px; opacity:0.8; }
92
- """
93
-
94
- with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
95
- with gr.Row():
96
- with gr.Column():
97
- gr.Markdown("<div class='header'>")
98
- gr.Image(value=LOGO_PATH, show_label=False, elem_classes=["logo"])
99
- gr.Markdown(
100
- "<h1 class='title'>DDS Insurance Q&A — RAG Assistant</h1>"
101
- "<p class='subnote'>Answers strictly from your insurance document(s)</p>"
102
- )
103
- gr.Markdown("</div>")
104
-
105
- with gr.Row():
106
- with gr.Column(scale=1):
107
- gr.Markdown("### Ask from Frequently Asked Questions")
108
- faq = gr.Dropdown(choices=FAQS, value=FAQS[0], label="Select a common question")
109
-
110
- gr.Markdown("### Or type your question")
111
- user_q = gr.Textbox(
112
- label="Your question",
113
- placeholder="e.g., What is covered under outpatient benefits?",
114
- lines=2
115
- )
116
- ask_btn = gr.Button("Ask", variant="primary")
117
-
118
- with gr.Column(scale=1):
119
- chosen_prompt = gr.Textbox(label="Query sent", interactive=False)
120
- answer_box = gr.Markdown()
121
-
122
- ask_btn.click(use_faq, inputs=[faq, user_q], outputs=[chosen_prompt, answer_box])
123
-
124
- if __name__ == "__main__":
125
- demo.launch()
 
1
+ # app.py — Insurance Q&A (RAG) with system prompt + simple config
2
+ import os
3
+ import gradio as gr
4
+ from pinecone import Pinecone, ServerlessSpec
5
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
6
+ from llama_index.vector_stores.pinecone import PineconeVectorStore
7
+ from llama_index.embeddings.openai import OpenAIEmbedding
8
+ from llama_index.llms.openai import OpenAI
9
+
10
+ # --- System Prompt (polite + answer-from-document constraint) --- Change
11
+ SYSTEM_PROMPT = """You are Aisha, a polite and professional Insurance assistant.
12
+ Answer ONLY using the information found in the indexed insurance document(s).
13
+ If the answer is not in the document(s), say: "I couldn’t find that in the document."
14
+ Keep responses concise, helpful, and courteous.
15
+ """
16
+
17
+ # ===== Minimal CONFIG (only necessary keys) =====
18
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
19
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
20
+ if not PINECONE_API_KEY or not OPENAI_API_KEY:
21
+ raise RuntimeError("Missing PINECONE_API_KEY or OPENAI_API_KEY (set them in Space → Settings → Variables).")
22
+
23
+ DATA_DIR = "data" # Put insurance docs here (e.g., data/insurance.pdf)
24
+ LOGO_PATH = os.path.join(DATA_DIR, "dds_logo.png") # Mandatory logo - change
25
+ if not os.path.exists(LOGO_PATH):
26
+ raise RuntimeError("Logo not found: data/dds_logo.png.png (commit it to your Space repo).")
27
+
28
+ EMBED_MODEL = "text-embedding-3-small" # 1536-dim
29
+ LLM_MODEL = "gpt-4o-mini"
30
+ TOP_K = 4 # internal similarity_top_k
31
+
32
+ # ===== LlamaIndex / Pinecone (simple, fixed serverless: aws/us-east-1) =====
33
+ Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL, api_key=OPENAI_API_KEY)
34
+ Settings.llm = OpenAI(model=LLM_MODEL, api_key=OPENAI_API_KEY, system_prompt=SYSTEM_PROMPT)
35
+
36
+ pc = Pinecone(api_key=PINECONE_API_KEY)
37
+ def ensure_index(name: str, dim: int = 1536):
38
+ names = [i["name"] for i in pc.list_indexes()]
39
+ if name not in names:
40
+ pc.create_index(
41
+ name=name, dimension=dim, metric="cosine",
42
+ spec=ServerlessSpec(cloud="aws", region="us-east-1"),
43
+ )
44
+ return pc.Index(name)
45
+
46
+ # Fixed index name for simplicity
47
+ pinecone_index = ensure_index("dds-insurance-index", dim=1536)
48
+ vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
49
+
50
+ def bootstrap_index():
51
+ if not os.path.isdir(DATA_DIR):
52
+ raise RuntimeError("No 'data/' directory found. Commit your documents to data/ in the Space repo.")
53
+ docs = SimpleDirectoryReader(DATA_DIR).load_data()
54
+ if not docs:
55
+ raise RuntimeError("No documents found in data/. Add e.g., data/insurance.pdf")
56
+ storage_ctx = StorageContext.from_defaults(vector_store=vector_store)
57
+ VectorStoreIndex.from_documents(docs, storage_context=storage_ctx, show_progress=True)
58
+
59
+ bootstrap_index()
60
+
61
+ def answer(query: str) -> str:
62
+ if not query.strip():
63
+ return "Please enter a question (or select one from the FAQ list)."
64
+ index = VectorStoreIndex.from_vector_store(vector_store)
65
+ resp = index.as_query_engine(similarity_top_k=TOP_K).query(query)
66
+ return str(resp)
67
+ #change
68
+ FAQS = [
69
+ "",
70
+ "What benefits are covered under the policy?",
71
+ "How do I file a claim and what documents are required?",
72
+ "What are the exclusions and limitations?",
73
+ "Is pre-authorization needed for hospitalization?",
74
+ "What is the reimbursement timeline?",
75
+ "How are outpatient vs inpatient services handled?",
76
+ "How can I check my network hospitals/clinics?",
77
+ "What is the co-pay or deductible policy?",
78
+ ]
79
+
80
+ def use_faq(selected_faq: str, free_text: str):
81
+ prompt = (selected_faq or "").strip() or (free_text or "").strip()
82
+ if not prompt:
83
+ return "", "Please select a FAQ or type your question."
84
+ return prompt, answer(prompt)
85
+
86
+ # ===== UI =====
87
+ CSS = """
88
+ .header { display:flex; flex-direction:column; align-items:center; gap:6px; }
89
+ .logo img { width:300px; height:300px; object-fit:contain; } /* fixed 300x300 */
90
+ .title { text-align:center; font-weight:700; font-size:1.4rem; margin:6px 0 0 0; }
91
+ .subnote { text-align:center; margin-top:-2px; opacity:0.8; }
92
+ """
93
+ #change title
94
+ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
95
+ with gr.Row():
96
+ with gr.Column():
97
+ gr.Markdown("<div class='header'>")
98
+ gr.Image(value=LOGO_PATH, show_label=False, elem_classes=["logo"])
99
+ gr.Markdown(
100
+ "<h1 class='title'>DDS Insurance Q&A — Gitex Challenge</h1>"
101
+ "<p class='subnote'>Answers strictly from your insurance document(s)</p>"
102
+ )
103
+ gr.Markdown("</div>")
104
+
105
+ with gr.Row():
106
+ with gr.Column(scale=1):
107
+ gr.Markdown("### Ask from Frequently Asked Questions")
108
+ faq = gr.Dropdown(choices=FAQS, value=FAQS[0], label="Select a common question")
109
+
110
+ gr.Markdown("### Or type your question")
111
+ user_q = gr.Textbox(
112
+ label="Your question",
113
+ placeholder="e.g., What is covered under outpatient benefits?",
114
+ lines=2
115
+ )
116
+ ask_btn = gr.Button("Ask", variant="primary")
117
+
118
+ with gr.Column(scale=1):
119
+ chosen_prompt = gr.Textbox(label="Query sent", interactive=False)
120
+ answer_box = gr.Markdown()
121
+
122
+ ask_btn.click(use_faq, inputs=[faq, user_q], outputs=[chosen_prompt, answer_box])
123
+
124
+ if __name__ == "__main__":
125
+ demo.launch()