decodingdatascience commited on
Commit
5a3dd7f
·
verified ·
1 Parent(s): 577c788

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -51
app.py CHANGED
@@ -1,32 +1,30 @@
1
- # app.py — Insurance Q&A (RAG) with Omantel branding, FAQ dropdown, no Top-K control
2
- # Hugging Face Spaces (Gradio) uses Pinecone + LlamaIndex + OpenAI
3
 
4
  import os
5
  import logging
6
  import gradio as gr
7
 
8
- # ---- Vector + LLM stack ----
9
  from pinecone import Pinecone, ServerlessSpec
10
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
11
  from llama_index.vector_stores.pinecone import PineconeVectorStore
12
  from llama_index.embeddings.openai import OpenAIEmbedding
13
  from llama_index.llms.openai import OpenAI
14
 
15
- # ========== CONFIG ==========
16
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
17
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
18
 
19
- # Optional overrides via Space Variables
20
  PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "dds-insurance-index")
21
  PINECONE_REGION = os.getenv("PINECONE_REGION", "us-east-1")
22
  PINECONE_CLOUD = os.getenv("PINECONE_CLOUD", "aws")
23
  EMBED_MODEL = os.getenv("EMBED_MODEL", "text-embedding-3-small") # 1536-dim
24
  LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")
25
 
26
- DATA_DIR = "data" # Place documents (e.g., insurance.pdf) here
27
- DEFAULT_TOP_K = 4 # Internal similarity_top_k (no UI slider)
28
 
29
- # Omantel branding — using the exact logo you provided (raw URL to ensure it displays)
30
  LOGO_URL = "https://raw.githubusercontent.com/Decoding-Data-Science/Omantel/main/Omantel_Logo%20(1).png"
31
 
32
  if not PINECONE_API_KEY:
@@ -37,12 +35,10 @@ if not OPENAI_API_KEY:
37
  logging.basicConfig(level=logging.INFO)
38
  log = logging.getLogger("dds-space")
39
 
40
- # ========== CLIENTS / GLOBALS ==========
41
- # LlamaIndex global settings
42
  Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL, api_key=OPENAI_API_KEY)
43
  Settings.llm = OpenAI(model=LLM_MODEL, api_key=OPENAI_API_KEY)
44
 
45
- # Pinecone
46
  pc = Pinecone(api_key=PINECONE_API_KEY)
47
 
48
  def ensure_index(name: str, dim: int = 1536):
@@ -61,25 +57,20 @@ pinecone_index = ensure_index(PINECONE_INDEX_NAME, dim=1536)
61
  vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
62
 
63
  def bootstrap_index():
64
- """Index all files in ./data into Pinecone (idempotent safe)."""
65
  if not os.path.isdir(DATA_DIR):
66
  raise RuntimeError("No 'data/' directory found. Commit your documents to data/ in the Space repo.")
67
-
68
  log.info("Loading documents from ./data ...")
69
  docs = SimpleDirectoryReader(DATA_DIR).load_data()
70
  if not docs:
71
  raise RuntimeError("No documents found in data/. Add e.g., data/insurance.pdf")
72
-
73
  log.info(f"Docs loaded: {len(docs)}. Upserting into Pinecone…")
74
  storage_ctx = StorageContext.from_defaults(vector_store=vector_store)
75
  VectorStoreIndex.from_documents(docs, storage_context=storage_ctx, show_progress=True)
76
  log.info("Index upsert complete.")
77
 
78
- # Build once at startup
79
  bootstrap_index()
80
 
81
  def answer(query: str) -> str:
82
- """Query the existing vector store and return an answer string."""
83
  if not query or not query.strip():
84
  return "Please enter a question (or select one from the FAQ list)."
85
  index = VectorStoreIndex.from_vector_store(vector_store)
@@ -87,7 +78,6 @@ def answer(query: str) -> str:
87
  resp = engine.query(query)
88
  return str(resp)
89
 
90
- # ---- Frequently Asked Questions (edit to your document) ----
91
  FAQS = [
92
  "",
93
  "What benefits are covered under the policy?",
@@ -101,52 +91,30 @@ FAQS = [
101
  ]
102
 
103
  def use_faq(selected_faq: str, free_text: str):
104
- """
105
- If a FAQ is selected, prefer it; otherwise use free_text.
106
- Returns the chosen prompt (echo in UI) and the model answer.
107
- """
108
  prompt = (selected_faq or "").strip() or (free_text or "").strip()
109
  if not prompt:
110
  return "", "Please select a FAQ or type your question."
111
  return prompt, answer(prompt)
112
 
113
- # ========== UI ==========
114
  CSS = """
115
- .header {
116
- display: flex;
117
- align-items: center;
118
- gap: 12px;
119
- justify-content: center;
120
- margin-top: 8px;
121
- }
122
- .header img {
123
- height: 42px;
124
- }
125
- .header h1 {
126
- margin: 0;
127
- font-weight: 700;
128
- font-size: 1.4rem;
129
- }
130
- .subnote {
131
- text-align: center;
132
- margin-top: -6px;
133
- opacity: 0.8;
134
- }
135
  """
136
 
137
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
138
- # Header with logo + centered title
139
- with gr.Row():
140
- gr.Markdown(
141
- f"""
142
  <div class="header">
143
  <img src="{LOGO_URL}" alt="Omantel logo" />
144
- <h1>Omantel Insurance Q&A — RAG Assistant</h1>
145
  </div>
 
146
  <p class="subnote">Ask about coverage, claims, exclusions, and more — powered by LlamaIndex + Pinecone</p>
147
- """,
148
- elem_id="header_md"
149
- )
150
 
151
  with gr.Row():
152
  with gr.Column(scale=1):
@@ -159,7 +127,6 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
159
  placeholder="e.g., What is covered under outpatient benefits?",
160
  lines=2
161
  )
162
-
163
  ask_btn = gr.Button("Ask", variant="primary")
164
 
165
  with gr.Column(scale=1):
 
1
+ # app.py — Insurance Q&A (RAG) with Omantel logo from GitHub URL (centered top)
2
+ # Minimal changes; logic preserved. Uses Pinecone + LlamaIndex + OpenAI.
3
 
4
  import os
5
  import logging
6
  import gradio as gr
7
 
 
8
  from pinecone import Pinecone, ServerlessSpec
9
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext, Settings
10
  from llama_index.vector_stores.pinecone import PineconeVectorStore
11
  from llama_index.embeddings.openai import OpenAIEmbedding
12
  from llama_index.llms.openai import OpenAI
13
 
14
+ # ===== CONFIG =====
15
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
16
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
 
 
18
  PINECONE_INDEX_NAME = os.getenv("PINECONE_INDEX_NAME", "dds-insurance-index")
19
  PINECONE_REGION = os.getenv("PINECONE_REGION", "us-east-1")
20
  PINECONE_CLOUD = os.getenv("PINECONE_CLOUD", "aws")
21
  EMBED_MODEL = os.getenv("EMBED_MODEL", "text-embedding-3-small") # 1536-dim
22
  LLM_MODEL = os.getenv("LLM_MODEL", "gpt-4o-mini")
23
 
24
+ DATA_DIR = "data"
25
+ DEFAULT_TOP_K = 4 # internal similarity_top_k (no UI control)
26
 
27
+ # Omantel logo (raw GitHub URL so it renders directly)
28
  LOGO_URL = "https://raw.githubusercontent.com/Decoding-Data-Science/Omantel/main/Omantel_Logo%20(1).png"
29
 
30
  if not PINECONE_API_KEY:
 
35
  logging.basicConfig(level=logging.INFO)
36
  log = logging.getLogger("dds-space")
37
 
38
+ # ===== LlamaIndex / Pinecone =====
 
39
  Settings.embed_model = OpenAIEmbedding(model=EMBED_MODEL, api_key=OPENAI_API_KEY)
40
  Settings.llm = OpenAI(model=LLM_MODEL, api_key=OPENAI_API_KEY)
41
 
 
42
  pc = Pinecone(api_key=PINECONE_API_KEY)
43
 
44
  def ensure_index(name: str, dim: int = 1536):
 
57
  vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
58
 
59
  def bootstrap_index():
 
60
  if not os.path.isdir(DATA_DIR):
61
  raise RuntimeError("No 'data/' directory found. Commit your documents to data/ in the Space repo.")
 
62
  log.info("Loading documents from ./data ...")
63
  docs = SimpleDirectoryReader(DATA_DIR).load_data()
64
  if not docs:
65
  raise RuntimeError("No documents found in data/. Add e.g., data/insurance.pdf")
 
66
  log.info(f"Docs loaded: {len(docs)}. Upserting into Pinecone…")
67
  storage_ctx = StorageContext.from_defaults(vector_store=vector_store)
68
  VectorStoreIndex.from_documents(docs, storage_context=storage_ctx, show_progress=True)
69
  log.info("Index upsert complete.")
70
 
 
71
  bootstrap_index()
72
 
73
  def answer(query: str) -> str:
 
74
  if not query or not query.strip():
75
  return "Please enter a question (or select one from the FAQ list)."
76
  index = VectorStoreIndex.from_vector_store(vector_store)
 
78
  resp = engine.query(query)
79
  return str(resp)
80
 
 
81
  FAQS = [
82
  "",
83
  "What benefits are covered under the policy?",
 
91
  ]
92
 
93
  def use_faq(selected_faq: str, free_text: str):
 
 
 
 
94
  prompt = (selected_faq or "").strip() or (free_text or "").strip()
95
  if not prompt:
96
  return "", "Please select a FAQ or type your question."
97
  return prompt, answer(prompt)
98
 
99
+ # ===== UI =====
100
  CSS = """
101
+ .header { text-align:center; }
102
+ .header img { max-height:80px; height:auto; }
103
+ .title { text-align:center; font-weight:700; font-size:1.4rem; margin:6px 0 0 0; }
104
+ .subnote { text-align:center; margin-top:-2px; opacity:0.8; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  """
106
 
107
  with gr.Blocks(css=CSS, theme=gr.themes.Soft()) as demo:
108
+ # Centered logo + title
109
+ gr.Markdown(
110
+ f"""
 
111
  <div class="header">
112
  <img src="{LOGO_URL}" alt="Omantel logo" />
 
113
  </div>
114
+ <h1 class="title">Omantel Insurance Q&A — RAG Assistant</h1>
115
  <p class="subnote">Ask about coverage, claims, exclusions, and more — powered by LlamaIndex + Pinecone</p>
116
+ """
117
+ )
 
118
 
119
  with gr.Row():
120
  with gr.Column(scale=1):
 
127
  placeholder="e.g., What is covered under outpatient benefits?",
128
  lines=2
129
  )
 
130
  ask_btn = gr.Button("Ask", variant="primary")
131
 
132
  with gr.Column(scale=1):