Spaces:
Sleeping
Sleeping
| import os | |
| import logging | |
| import sys | |
| import gradio as gr | |
| from pinecone import Pinecone | |
| from llama_index.core import VectorStoreIndex, Settings, StorageContext | |
| from llama_index.vector_stores.pinecone import PineconeVectorStore | |
| from llama_index.llms.openai import OpenAI | |
| from llama_index.embeddings.openai import OpenAIEmbedding | |
| from llama_index.core.prompts import PromptTemplate | |
| # --- Logging --- | |
| logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
| # --- Secrets --- | |
| PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY") | |
| OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") | |
| if not PINECONE_API_KEY: | |
| raise ValueError("Missing PINECONE_API_KEY") | |
| if not OPENAI_API_KEY: | |
| raise ValueError("Missing OPENAI_API_KEY") | |
| # --- LLM Settings --- | |
| Settings.llm = OpenAI( | |
| model="gpt-4.1-mini", | |
| temperature=0.2, | |
| api_key=OPENAI_API_KEY | |
| ) | |
| Settings.embed_model = OpenAIEmbedding( | |
| model="text-embedding-ada-002", | |
| api_key=OPENAI_API_KEY | |
| ) | |
| # Better chunking (IMPORTANT) | |
| Settings.chunk_size = 500 | |
| Settings.chunk_overlap = 100 | |
| # --- ✅ REAL FIX: Custom QA Prompt --- | |
| qa_prompt = PromptTemplate( | |
| """ | |
| You are Helen, the Decoding Data Science (DDS) HR Enterprise chatbot. | |
| You must ONLY answer HR-related questions using the provided context. | |
| --------------------- | |
| Context: | |
| {context_str} | |
| --------------------- | |
| Question: | |
| {query_str} | |
| Instructions: | |
| - First determine if the question is DDS HR-related | |
| - If NOT HR-related, respond: | |
| "I’m here to answer only HR-related questions. Please ask a question related to HR policies or employee matters." | |
| - If HR-related but answer is NOT in the context, respond: | |
| "I’m sorry, I don’t have enough information to answer that. Please contact connect@decodingdatascience.com for further assistance." | |
| - If answer EXISTS in the context: | |
| - Answer strictly using the context | |
| - Do NOT add external knowledge | |
| - Keep answer between 2–4 sentences | |
| - Maintain a professional and polite tone | |
| - ALWAYS end with a citation from the context that links to the answer | |
| - Do NOT include reasoning steps | |
| - Do NOT hallucinate | |
| Answer: | |
| """ | |
| ) | |
| # --- Pinecone اتصال --- | |
| index_name = "quickstart" | |
| pc = Pinecone(api_key=PINECONE_API_KEY) | |
| pinecone_index = pc.Index(index_name) | |
| # --- Vector Store --- | |
| vector_store = PineconeVectorStore(pinecone_index=pinecone_index) | |
| storage_context = StorageContext.from_defaults(vector_store=vector_store) | |
| index = VectorStoreIndex.from_vector_store( | |
| vector_store=vector_store, | |
| storage_context=storage_context | |
| ) | |
| # --- ✅ Fixed Query Engine --- | |
| query_engine = index.as_query_engine( | |
| text_qa_template=qa_prompt, | |
| similarity_top_k=3 | |
| ) | |
| # --- Gradio --- | |
| def query_doc(prompt): | |
| try: | |
| if not prompt or not prompt.strip(): | |
| return "Please enter your HR-related question." | |
| response = query_engine.query(prompt) | |
| return str(response) | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| demo = gr.Interface( | |
| fn=query_doc, | |
| inputs= | |
| gr.Textbox(label="Ask Helen your HR-related question: ", lines = 5), | |
| outputs=gr.Textbox(label="Answer", lines = 5), | |
| title="Helen the DDS Enterprise Professional Chatbot", | |
| description="Ask HR questions based on the indexed HR documents. Powered by OpenAI, LlamaIndex & Pinecone.", | |
| examples=[ | |
| "Can I take leave next month?", | |
| "How many annual leave days do I get?", | |
| "Can I travel and work at the same time?", | |
| "What happens if I miss too many meetings?" | |
| ], | |
| theme = "ocean" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |