ssahal commited on
Commit
534bbe1
·
verified ·
1 Parent(s): b8395f7

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -139
app.py DELETED
@@ -1,139 +0,0 @@
1
-
2
- import streamlit as st
3
- import os
4
- import json
5
- import requests
6
- from langchain_community.document_loaders import PyMuPDFLoader
7
- from openai import OpenAI
8
- import tiktoken
9
- import pandas as pd
10
- from langchain.text_splitter import RecursiveCharacterTextSplitter
11
- from langchain_community.embeddings.openai import OpenAIEmbeddings
12
- from langchain_community.vectorstores import Chroma
13
- import tempfile
14
-
15
-
16
- OPENAI_API_KEY = os.environ.get("API_KEY")
17
- OPENAI_API_BASE = os.environ.get("API_BASE")
18
-
19
- # Initialize OpenAI client
20
- client = OpenAI(
21
- api_key=OPENAI_API_KEY,
22
- base_url=OPENAI_API_BASE
23
- )
24
-
25
- # Define the system prompt for the model
26
- qna_system_message = """
27
- # You are an AI assistant designed to support the HR team at Flykite Airlines. Your task is to provide evidence-based, concise, and relevant answers to employee queries based on the context provided.
28
-
29
- User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context. The context contains references to specific portions of the official Flykite Airlines HR Policy Handbook and related documentation.
30
-
31
- When crafting your response:
32
- 1. Use only the provided context to answer the question.
33
- 2. If the answer is found in the context, respond with concise and actionable HR policy information.
34
- 3. Include the source reference with the section name, heading, or clause number, as provided in the context.
35
- 4. If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base."
36
-
37
- Please adhere to the following response guidelines:
38
- - Provide clear, direct answers using only the given context.
39
- - Do not include any additional information outside of the context.
40
- - Avoid rephrasing or summarizing the context unless explicitly relevant to the question.
41
- - If no relevant answer exists in the context, respond with: "Sorry, this is out of my knowledge base."
42
- - If the context is not provided, your response should also be: "Sorry, this is out of my knowledge base."
43
-
44
- Here is an example of how to structure your response:
45
-
46
- Answer:
47
- [Answer based on context]
48
-
49
- Source:
50
- [Source details with section, clause, or heading]
51
- """
52
-
53
- # Define the user message template
54
- qna_user_message_template = """
55
- # ###Context
56
- Here are some excerpts from the Flykite Airlines HR Policy Handbook and their sources that are relevant to the employee's question mentioned below:
57
- {context}
58
-
59
- ###Question
60
- {question}
61
- """
62
-
63
- # Processing PDF files
64
- @st.cache_resource
65
- def load_and_process_pdfs(uploaded_files):
66
- all_documents = []
67
- for uploaded_file in uploaded_files:
68
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
69
- tmp_file.write(uploaded_file.getvalue())
70
- tmp_file_path = tmp_file.name
71
- loader = PyMuPDFLoader(tmp_file_path)
72
- documents = loader.load()
73
- all_documents.extend(documents)
74
- os.remove(tmp_file_path) # Clean up the temporary file
75
- text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
76
- encoding_name='cl100k_base',
77
- chunk_size=1000,
78
- )
79
- document_chunks = text_splitter.split_documents(all_documents)
80
-
81
- embedding_model = OpenAIEmbeddings(
82
- openai_api_key=OPENAI_API_KEY,
83
- openai_api_base=OPENAI_API_BASE
84
- )
85
-
86
- # Create an in-memory vector store (or use a persistent one if needed)
87
- vectorstore = Chroma.from_documents(
88
- document_chunks,
89
- embedding_model
90
- )
91
- return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 3})
92
-
93
- def generate_rag_response(user_input, retriever, max_tokens=500, temperature=0, top_p=0.95):
94
- # Retrieve relevant document chunks
95
- relevant_document_chunks = retriever.get_relevant_documents(query=user_input)
96
- context_list = [d.page_content for d in relevant_document_chunks]
97
-
98
- # Combine document chunks into a single context
99
- context_for_query = ". ".join(context_list)
100
-
101
- user_message = qna_user_message_template.replace('{context}', context_for_query)
102
- user_message = user_message.replace('{question}', user_input)
103
-
104
- # Generate the response
105
- try:
106
- response = client.chat.completions.create(
107
- model="gpt-4o-mini",
108
- messages=[
109
- {"role": "system", "content": qna_system_message},
110
- {"role": "user", "content": user_message}
111
- ],
112
- max_tokens=max_tokens,
113
- temperature=temperature,
114
- top_p=top_p
115
- )
116
- response = response.choices[0].message.content.strip()
117
- except Exception as e:
118
- response = f'Sorry, I encountered the following error: \n {e}'
119
-
120
- return response
121
-
122
- # Streamlit App
123
- st.title("LLM-Powered Support bot")
124
-
125
- uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)
126
-
127
- retriever = None
128
- if uploaded_files:
129
- st.info("Processing uploaded PDFs...")
130
- retriever = load_and_process_pdfs(uploaded_files)
131
- st.success("PDFs processed and ready for questioning!")
132
-
133
-
134
- if retriever:
135
- user_question = st.text_input("Ask a question about the uploaded documents:")
136
- if user_question:
137
- with st.spinner("Generating response..."):
138
- rag_response = generate_rag_response(user_question, retriever)
139
- st.write(rag_response)