felipelemes commited on
Commit
9d7f1da
·
verified ·
1 Parent(s): 48fadb4

"Donate" button added

Browse files
Files changed (2) hide show
  1. app.py +157 -147
  2. donate.png +0 -0
app.py CHANGED
@@ -1,148 +1,158 @@
1
- import streamlit as st
2
- import os
3
- from langchain.embeddings import SentenceTransformerEmbeddings
4
- from langchain.vectorstores import FAISS
5
- from langchain_openai import ChatOpenAI
6
- from langchain.chains import RetrievalQA
7
- from langchain.prompts import ChatPromptTemplate
8
-
9
- # --- Path Configurations ---
10
- VECTOR_DB_PATH = "vector_db"
11
- EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
12
-
13
- # --- 1. Load Resources (Vector Database and Embedding Model) ---
14
- # @st.cache_resource loads these components only once when the Streamlit app starts
15
- @st.cache_resource
16
- def load_resources():
17
- st.spinner("Loading embedding model...")
18
- print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}...")
19
- embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL_NAME)
20
- print("Embedding model loaded.")
21
-
22
- st.spinner("Loading vector database...")
23
- print(f"Loading FAISS vector database from: {VECTOR_DB_PATH}...")
24
- # allow_dangerous_deserialization=True is needed for FAISS.load_local
25
- # It's safe to use if you generated the database yourself.
26
- vector_db = FAISS.load_local(VECTOR_DB_PATH, embeddings, allow_dangerous_deserialization=True)
27
- print("Vector database loaded.")
28
-
29
- return embeddings, vector_db
30
-
31
- embeddings, vector_db = load_resources()
32
-
33
- # --- 2. Load and Configure the OpenAI LLM (GPT-4o) ---
34
- openai_api_key = os.getenv("OPENAI_API_KEY")
35
-
36
- if openai_api_key:
37
- try:
38
- llm = ChatOpenAI(
39
- temperature=0.85, # Controls creativity/randomness (0.0 to 1.0)
40
- api_key=openai_api_key,
41
- model_name="gpt-4o",
42
- model_kwargs={"top_p": 0.9} # Controls diversity of output
43
- )
44
- st.success("OpenAI model (gpt-4o) loaded successfully!")
45
- except Exception as e:
46
- st.error(f"Error initializing OpenAI model. Check your API key, "
47
- f"model name, and plan/quotas: {e}")
48
- st.stop() # Stop the app if LLM cannot be initialized
49
- else:
50
- st.error("OpenAI API Key (OPENAI_API_KEY) not found in environment variables.")
51
- st.stop() # Stop the app if API key is not found
52
-
53
- # --- 3. Define the System Prompt for Assistant Behavior ---
54
- SYSTEM_PROMPT_TEMPLATE = """
55
- You are a friendly, experienced, and patient study tutor specializing in Databricks.
56
- Your goal is to help the user deeply understand topics from Databricks documentation to prepare for Databricks certifications.
57
-
58
- Follow these guidelines:
59
- 1. **Always respond in the same language as the user's question.** If the question is in Portuguese, reply in Portuguese. If it's in English, reply in English.
60
- 2. **Explain clearly and concisely:** Use accessible language and avoid unnecessary jargon where possible.
61
- 3. **Go beyond simple retrieval:** Do not just reproduce information. Interpret it, reorganize it, and present it in a didactic way.
62
- 4. **Provide practical examples:** If appropriate, create small examples or analogies to illustrate the concept within the context of Databricks or data engineering scenarios.
63
- 5. **Maintain an encouraging and motivating tone:** Encourage the user in their learning.
64
- 6. **Use the provided "Context Documents" to answer the question.** Prioritize information from these documents.
65
- 7. **If the answer is not in the context documents, be honest:** State that you could not find the information and suggest the user search other sources or rephrase the question. Do not invent information.
66
- 8. Format your responses legibly, using lists, bold text, or code blocks when appropriate.
67
-
68
- Context Documents:
69
- {context}
70
-
71
- User Question:
72
- {question}
73
- """
74
-
75
- # Create a ChatPromptTemplate from the System Prompt
76
- qa_prompt = ChatPromptTemplate.from_messages(
77
- [
78
- ("system", SYSTEM_PROMPT_TEMPLATE),
79
- ("human", "{question}") # Where the user's question will be inserted
80
- ]
81
- )
82
-
83
- # --- 4. Configure the RAG Chain (RetrievalQA) ---
84
- print("Configuring the RAG chain...")
85
- qa_chain = RetrievalQA.from_chain_type(
86
- llm=llm, # <-- THIS IS THE CORRECTED LINE!
87
- chain_type="stuff", # 'stuff' strategy puts all retrieved documents directly into the LLM's prompt
88
- retriever=vector_db.as_retriever(search_kwargs={"k": 4}), # Configure FAISS as the retriever
89
- # k=4 means it retrieves the 4 most relevant chunks
90
- return_source_documents=True, # Optional: returns the documents that were used for the answer
91
- chain_type_kwargs={"prompt": qa_prompt} # Pass the custom prompt to the chain
92
- )
93
- print("RAG chain configured.")
94
-
95
- # --- 5. Streamlit Interface ---
96
- st.set_page_config(
97
- page_title="📚 Databricks Study Assistant with RAG",
98
- layout="wide",
99
- initial_sidebar_state="collapsed"
100
- )
101
-
102
- st.title("📚 Databricks Study Assistant with RAG") # Updated title text
103
-
104
- # Updated objective description
105
- st.markdown("""
106
- This assistant is designed to provide you with precise, context-aware answers directly sourced from the official Azure Databricks documentation.
107
- It aims to significantly aid your studies for Databricks certifications and streamline the process of resolving technical challenges by offering a more fluid and natural consultation experience.
108
- """)
109
-
110
- # Updated context description
111
- st.markdown("""
112
- This assistant's knowledge base is built upon the official Azure Databricks documentation
113
- ([https://learn.microsoft.com/en-us/azure/databricks/](https://learn.microsoft.com/en-us/azure/databricks/))
114
- and the official Databricks Azure Knowledge Base
115
- ([https://kb.databricks.com/](https://kb.databricks.com/)).
116
- """)
117
-
118
- user_query = st.text_input(
119
- "Your question about Databricks documentation:",
120
- placeholder="Ex: How to configure Auto Loader in Databricks?"
121
- )
122
-
123
- if st.button("Get Answer", type="primary"):
124
- if user_query:
125
- with st.spinner("Searching and generating response..."):
126
- try:
127
- response = qa_chain({"query": user_query})
128
- st.subheader("Answer:")
129
- st.markdown(response["result"]) # Use markdown for formatting the response
130
-
131
- st.subheader("Source Documents:")
132
- if response["source_documents"]:
133
- for i, doc in enumerate(response["source_documents"]):
134
- st.write(f"**Page/Source {i+1}:**")
135
- st.info(doc.page_content) # Content of the chunk
136
- if 'page' in doc.metadata: # If the PDF loader added the page number
137
- st.write(f"*(Page: {doc.metadata['page'] + 1})*") # +1 because it's 0-based
138
- st.markdown("---")
139
- else:
140
- st.info("No relevant source documents found for this question.")
141
- except Exception as e:
142
- st.error(f"An error occurred while processing your question: {e}")
143
- st.info("Please check your OpenAI API key, model name, and plan/quotas.")
144
- else:
145
- st.warning("Please type your question before submitting.")
146
-
147
- st.markdown("---")
 
 
 
 
 
 
 
 
 
 
148
  st.caption("Developed by you, with LangChain, Streamlit, and LLMs.")
 
1
+ import streamlit as st
2
+ import os
3
+ from langchain.embeddings import SentenceTransformerEmbeddings
4
+ from langchain.vectorstores import FAISS
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.prompts import ChatPromptTemplate
8
+
9
+ # --- Path Configurations ---
10
+ VECTOR_DB_PATH = "vector_db"
11
+ EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2"
12
+
13
+ # --- 1. Load Resources (Vector Database and Embedding Model) ---
14
+ # @st.cache_resource loads these components only once when the Streamlit app starts
15
+ @st.cache_resource
16
+ def load_resources():
17
+ st.spinner("Loading embedding model...")
18
+ print(f"Loading embedding model: {EMBEDDING_MODEL_NAME}...")
19
+ embeddings = SentenceTransformerEmbeddings(model_name=EMBEDDING_MODEL_NAME)
20
+ print("Embedding model loaded.")
21
+
22
+ st.spinner("Loading vector database...")
23
+ print(f"Loading FAISS vector database from: {VECTOR_DB_PATH}...")
24
+ # allow_dangerous_deserialization=True is needed for FAISS.load_local
25
+ # It's safe to use if you generated the database yourself.
26
+ vector_db = FAISS.load_local(VECTOR_DB_PATH, embeddings, allow_dangerous_deserialization=True)
27
+ print("Vector database loaded.")
28
+
29
+ return embeddings, vector_db
30
+
31
+ embeddings, vector_db = load_resources()
32
+
33
+ # --- 2. Load and Configure the OpenAI LLM (GPT-4o) ---
34
+ openai_api_key = os.getenv("OPENAI_API_KEY")
35
+
36
+ if openai_api_key:
37
+ try:
38
+ llm = ChatOpenAI(
39
+ temperature=0.85, # Controls creativity/randomness (0.0 to 1.0)
40
+ api_key=openai_api_key,
41
+ model_name="gpt-4o",
42
+ model_kwargs={"top_p": 0.9} # Controls diversity of output
43
+ )
44
+ st.success("OpenAI model (gpt-4o) loaded successfully!")
45
+ except Exception as e:
46
+ st.error(f"Error initializing OpenAI model. Check your API key, "
47
+ f"model name, and plan/quotas: {e}")
48
+ st.stop() # Stop the app if LLM cannot be initialized
49
+ else:
50
+ st.error("OpenAI API Key (OPENAI_API_KEY) not found in environment variables.")
51
+ st.stop() # Stop the app if API key is not found
52
+
53
+ # --- 3. Define the System Prompt for Assistant Behavior ---
54
+ SYSTEM_PROMPT_TEMPLATE = """
55
+ You are a friendly, experienced, and patient study tutor specializing in Databricks.
56
+ Your goal is to help the user deeply understand topics from Databricks documentation to prepare for Databricks certifications.
57
+
58
+ Follow these guidelines:
59
+ 1. **Always respond in the same language as the user's question.** If the question is in Portuguese, reply in Portuguese. If it's in English, reply in English.
60
+ 2. **Explain clearly and concisely:** Use accessible language and avoid unnecessary jargon where possible.
61
+ 3. **Go beyond simple retrieval:** Do not just reproduce information. Interpret it, reorganize it, and present it in a didactic way.
62
+ 4. **Provide practical examples:** If appropriate, create small examples or analogies to illustrate the concept within the context of Databricks or data engineering scenarios.
63
+ 5. **Maintain an encouraging and motivating tone:** Encourage the user in their learning.
64
+ 6. **Use the provided "Context Documents" to answer the question.** Prioritize information from these documents.
65
+ 7. **If the answer is not in the context documents, be honest:** State that you could not find the information and suggest the user search other sources or rephrase the question. Do not invent information.
66
+ 8. Format your responses legibly, using lists, bold text, or code blocks when appropriate.
67
+
68
+ Context Documents:
69
+ {context}
70
+
71
+ User Question:
72
+ {question}
73
+ """
74
+
75
+ # Create a ChatPromptTemplate from the System Prompt
76
+ qa_prompt = ChatPromptTemplate.from_messages(
77
+ [
78
+ ("system", SYSTEM_PROMPT_TEMPLATE),
79
+ ("human", "{question}") # Where the user's question will be inserted
80
+ ]
81
+ )
82
+
83
+ # --- 4. Configure the RAG Chain (RetrievalQA) ---
84
+ print("Configuring the RAG chain...")
85
+ qa_chain = RetrievalQA.from_chain_type(
86
+ llm=llm, # <-- THIS IS THE CORRECTED LINE!
87
+ chain_type="stuff", # 'stuff' strategy puts all retrieved documents directly into the LLM's prompt
88
+ retriever=vector_db.as_retriever(search_kwargs={"k": 4}), # Configure FAISS as the retriever
89
+ # k=4 means it retrieves the 4 most relevant chunks
90
+ return_source_documents=True, # Optional: returns the documents that were used for the answer
91
+ chain_type_kwargs={"prompt": qa_prompt} # Pass the custom prompt to the chain
92
+ )
93
+ print("RAG chain configured.")
94
+
95
+ # --- 5. Streamlit Interface ---
96
+ st.set_page_config(
97
+ page_title="📚 Databricks Study Assistant with RAG",
98
+ layout="wide",
99
+ initial_sidebar_state="collapsed"
100
+ )
101
+
102
+ # Create columns layout
103
+ col1, col2 = st.columns([3, 1])
104
+
105
+ with col1:
106
+ st.title("📚 Databricks Study Assistant with RAG") # Updated title text
107
+
108
+ # Updated objective description
109
+ st.markdown("""
110
+ This assistant is designed to provide you with precise, context-aware answers directly sourced from the official Azure Databricks documentation.
111
+ It aims to significantly aid your studies for Databricks certifications and streamline the process of resolving technical challenges by offering a more fluid and natural consultation experience.
112
+ """)
113
+
114
+ # Updated context description
115
+ st.markdown("""
116
+ This assistant's knowledge base is built upon the official Azure Databricks documentation
117
+ ([https://learn.microsoft.com/en-us/azure/databricks/](https://learn.microsoft.com/en-us/azure/databricks/))
118
+ and the official Databricks Azure Knowledge Base
119
+ ([https://kb.databricks.com/](https://kb.databricks.com/)).
120
+ """)
121
+
122
+ user_query = st.text_input(
123
+ "Your question about Databricks documentation:",
124
+ placeholder="Ex: How to configure Auto Loader in Databricks?"
125
+ )
126
+
127
+ if st.button("Get Answer", type="primary"):
128
+ if user_query:
129
+ with st.spinner("Searching and generating response..."):
130
+ try:
131
+ response = qa_chain({"query": user_query})
132
+ st.subheader("Answer:")
133
+ st.markdown(response["result"]) # Use markdown for formatting the response
134
+
135
+ st.subheader("Source Documents:")
136
+ if response["source_documents"]:
137
+ for i, doc in enumerate(response["source_documents"]):
138
+ st.write(f"**Page/Source {i+1}:**")
139
+ st.info(doc.page_content) # Content of the chunk
140
+ if 'page' in doc.metadata: # If the PDF loader added the page number
141
+ st.write(f"*(Page: {doc.metadata['page'] + 1})*") # +1 because it's 0-based
142
+ st.markdown("---")
143
+ else:
144
+ st.info("No relevant source documents found for this question.")
145
+ except Exception as e:
146
+ st.error(f"An error occurred while processing your question: {e}")
147
+ st.info("Please check your OpenAI API key, model name, and plan/quotas.")
148
+ else:
149
+ st.warning("Please type your question before submitting.")
150
+
151
+ with col2:
152
+ st.markdown("<br><br>", unsafe_allow_html=True) # Espaço para baixo
153
+ st.markdown('<div style="text-align: right;">', unsafe_allow_html=True)
154
+ st.image("donate.png", width=180)
155
+ st.markdown('</div>', unsafe_allow_html=True)
156
+
157
+ st.markdown("---")
158
  st.caption("Developed by you, with LangChain, Streamlit, and LLMs.")
donate.png ADDED