File size: 3,575 Bytes
c2b0067
 
 
 
a836a0b
 
 
c2b0067
a836a0b
 
cdaf551
a836a0b
c2b0067
 
a836a0b
cdaf551
c2b0067
a836a0b
 
c2b0067
cdaf551
a836a0b
 
cdaf551
a836a0b
cdaf551
a836a0b
 
 
 
 
 
 
cdaf551
a836a0b
 
 
cdaf551
 
 
a836a0b
cdaf551
 
 
93c691a
51dc133
cdaf551
51dc133
cdaf551
 
 
 
51dc133
cdaf551
 
51dc133
cdaf551
93c691a
cdaf551
 
51dc133
cdaf551
 
51dc133
cdaf551
 
 
 
 
93c691a
51dc133
cdaf551
 
51dc133
cdaf551
 
 
a836a0b
cdaf551
 
a836a0b
c2b0067
a836a0b
 
cdaf551
a836a0b
 
 
 
 
 
 
 
cdaf551
a836a0b
cdaf551
 
a836a0b
 
cdaf551
c2b0067
 
a836a0b
cdaf551
 
c2b0067
 
cdaf551
c2b0067
 
a836a0b
93c691a
 
 
f6fc25b
 
93c691a
 
 
 
 
 
271eedc
 
27498a1
c2b0067
a836a0b
58af7c6
a836a0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import os
import logging
import sys
import gradio as gr

from pinecone import Pinecone
from llama_index.core import VectorStoreIndex, Settings, StorageContext
from llama_index.vector_stores.pinecone import PineconeVectorStore
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.prompts import PromptTemplate

# --- Logging ---
logging.basicConfig(stream=sys.stdout, level=logging.INFO)

# --- Secrets ---
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")

if not PINECONE_API_KEY:
    raise ValueError("Missing PINECONE_API_KEY")

if not OPENAI_API_KEY:
    raise ValueError("Missing OPENAI_API_KEY")

# --- LLM Settings ---
Settings.llm = OpenAI(
    model="gpt-4.1-mini",
    temperature=0.2,
    api_key=OPENAI_API_KEY
)

Settings.embed_model = OpenAIEmbedding(
    model="text-embedding-ada-002",
    api_key=OPENAI_API_KEY
)

# Better chunking (IMPORTANT)
Settings.chunk_size = 500
Settings.chunk_overlap = 100

# --- ✅ REAL FIX: Custom QA Prompt ---
qa_prompt = PromptTemplate(
    """
You are Helen, the Decoding Data Science (DDS) HR Enterprise chatbot.

You must ONLY answer HR-related questions using the provided context.

---------------------
Context:
{context_str}
---------------------

Question:
{query_str}

Instructions:
- First determine if the question is DDS HR-related
- If NOT HR-related, respond:
  "I’m here to answer only HR-related questions. Please ask a question related to HR policies or employee matters."

- If HR-related but answer is NOT in the context, respond:
  "I’m sorry, I don’t have enough information to answer that. Please contact connect@decodingdatascience.com for further assistance."

- If answer EXISTS in the context:
  - Answer strictly using the context
  - Do NOT add external knowledge
  - Keep answer between 2–4 sentences
  - Maintain a professional and polite tone
  - ALWAYS end with a citation from the context that links to the answer

- Do NOT include reasoning steps
- Do NOT hallucinate

Answer:
"""
)

# --- Pinecone اتصال ---
index_name = "quickstart"

pc = Pinecone(api_key=PINECONE_API_KEY)
pinecone_index = pc.Index(index_name)

# --- Vector Store ---
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context
)

# --- ✅ Fixed Query Engine ---
query_engine = index.as_query_engine(
    text_qa_template=qa_prompt,
    similarity_top_k=3
)

# --- Gradio ---
def query_doc(prompt):
    try:
        if not prompt or not prompt.strip():
            return "Please enter your HR-related question."

        response = query_engine.query(prompt)
        return str(response)

    except Exception as e:
        return f"Error: {str(e)}"

demo = gr.Interface( 
    fn=query_doc, 
    inputs=
    gr.Textbox(label="Ask Helen your HR-related question: ", lines = 5), 
    outputs=gr.Textbox(label="Answer", lines = 5), 
    title="Helen the DDS Enterprise Professional Chatbot", 
    description="Ask HR questions based on the indexed HR documents. Powered by OpenAI, LlamaIndex & Pinecone.", 
    examples=[ 
        "Can I take leave next month?", 
        "How many annual leave days do I get?", 
        "Can I travel and work at the same time?", 
        "What happens if I miss too many meetings?" 
    ], 
    theme = "ocean"
)

if __name__ == "__main__":
    demo.launch()