ssahal commited on
Commit
a701a47
·
verified ·
1 Parent(s): 534bbe1

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +23 -0
  2. app.py +139 -0
  3. requirements.txt +11 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM python:3.9-slim
3
+
4
+ WORKDIR /app
5
+
6
+ RUN apt-get update && apt-get install -y \
7
+ build-essential \
8
+ curl \
9
+ software-properties-common \
10
+ git \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ COPY requirements.txt ./
14
+ COPY app.py ./
15
+ COPY src/ ./src/
16
+
17
+ RUN pip3 install -r requirements.txt
18
+
19
+ EXPOSE 8501
20
+
21
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
22
+
23
+ ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0","--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import os
4
+ import json
5
+ import requests
6
+ from langchain_community.document_loaders import PyMuPDFLoader
7
+ from openai import OpenAI
8
+ import tiktoken
9
+ import pandas as pd
10
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ from langchain_community.embeddings.openai import OpenAIEmbeddings
12
+ from langchain_community.vectorstores import Chroma
13
+ import tempfile
14
+
15
+
16
+ OPENAI_API_KEY = os.environ.get("API_KEY")
17
+ OPENAI_API_BASE = os.environ.get("API_BASE")
18
+
19
+ # Initialize OpenAI client
20
+ client = OpenAI(
21
+ api_key=OPENAI_API_KEY,
22
+ base_url=OPENAI_API_BASE
23
+ )
24
+
25
+ # Define the system prompt for the model
26
+ qna_system_message = """
27
+ # You are an AI assistant designed to support the HR team at Flykite Airlines. Your task is to provide evidence-based, concise, and relevant answers to employee queries based on the context provided.
28
+
29
+ User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context. The context contains references to specific portions of the official Flykite Airlines HR Policy Handbook and related documentation.
30
+
31
+ When crafting your response:
32
+ 1. Use only the provided context to answer the question.
33
+ 2. If the answer is found in the context, respond with concise and actionable HR policy information.
34
+ 3. Include the source reference with the section name, heading, or clause number, as provided in the context.
35
+ 4. If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base."
36
+
37
+ Please adhere to the following response guidelines:
38
+ - Provide clear, direct answers using only the given context.
39
+ - Do not include any additional information outside of the context.
40
+ - Avoid rephrasing or summarizing the context unless explicitly relevant to the question.
41
+ - If no relevant answer exists in the context, respond with: "Sorry, this is out of my knowledge base."
42
+ - If the context is not provided, your response should also be: "Sorry, this is out of my knowledge base."
43
+
44
+ Here is an example of how to structure your response:
45
+
46
+ Answer:
47
+ [Answer based on context]
48
+
49
+ Source:
50
+ [Source details with section, clause, or heading]
51
+ """
52
+
53
+ # Define the user message template
54
+ qna_user_message_template = """
55
+ # ###Context
56
+ Here are some excerpts from the Flykite Airlines HR Policy Handbook and their sources that are relevant to the employee's question mentioned below:
57
+ {context}
58
+
59
+ ###Question
60
+ {question}
61
+ """
62
+
63
+ # Processing PDF files
64
+ @st.cache_resource
65
+ def load_and_process_pdfs(uploaded_files):
66
+ all_documents = []
67
+ for uploaded_file in uploaded_files:
68
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
69
+ tmp_file.write(uploaded_file.getvalue())
70
+ tmp_file_path = tmp_file.name
71
+ loader = PyMuPDFLoader(tmp_file_path)
72
+ documents = loader.load()
73
+ all_documents.extend(documents)
74
+ os.remove(tmp_file_path) # Clean up the temporary file
75
+ text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
76
+ encoding_name='cl100k_base',
77
+ chunk_size=1000,
78
+ )
79
+ document_chunks = text_splitter.split_documents(all_documents)
80
+
81
+ embedding_model = OpenAIEmbeddings(
82
+ openai_api_key=OPENAI_API_KEY,
83
+ openai_api_base=OPENAI_API_BASE
84
+ )
85
+
86
+ # Create an in-memory vector store (or use a persistent one if needed)
87
+ vectorstore = Chroma.from_documents(
88
+ document_chunks,
89
+ embedding_model
90
+ )
91
+ return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 3})
92
+
93
+ def generate_rag_response(user_input, retriever, max_tokens=500, temperature=0, top_p=0.95):
94
+ # Retrieve relevant document chunks
95
+ relevant_document_chunks = retriever.get_relevant_documents(query=user_input)
96
+ context_list = [d.page_content for d in relevant_document_chunks]
97
+
98
+ # Combine document chunks into a single context
99
+ context_for_query = ". ".join(context_list)
100
+
101
+ user_message = qna_user_message_template.replace('{context}', context_for_query)
102
+ user_message = user_message.replace('{question}', user_input)
103
+
104
+ # Generate the response
105
+ try:
106
+ response = client.chat.completions.create(
107
+ model="gpt-4o-mini",
108
+ messages=[
109
+ {"role": "system", "content": qna_system_message},
110
+ {"role": "user", "content": user_message}
111
+ ],
112
+ max_tokens=max_tokens,
113
+ temperature=temperature,
114
+ top_p=top_p
115
+ )
116
+ response = response.choices[0].message.content.strip()
117
+ except Exception as e:
118
+ response = f'Sorry, I encountered the following error: \n {e}'
119
+
120
+ return response
121
+
122
+ # Streamlit App
123
+ st.title("LLM-Powered Support bot")
124
+
125
+ uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
126
+
127
+ retriever = None
128
+ if uploaded_files:
129
+ st.info("Processing uploaded PDFs...")
130
+ retriever = load_and_process_pdfs(uploaded_files)
131
+ st.success("PDFs processed and ready for questioning!")
132
+
133
+
134
+ if retriever:
135
+ user_question = st.text_input("Ask a question about the uploaded documents:")
136
+ if user_question:
137
+ with st.spinner("Generating response..."):
138
+ rag_response = generate_rag_response(user_question, retriever)
139
+ st.write(rag_response)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ langchain_community==0.3.27
3
+ langchain==0.3.27
4
+ chromadb==1.0.15
5
+ pymupdf==1.26.3
6
+ tiktoken==0.9.0
7
+ datasets==4.0.0
8
+ evaluate==0.4.5
9
+ streamlit==1.35.0
10
+ openai==1.99.1
11
+ langchain_openai==0.3.28