Samizie commited on
Commit
29e2c36
·
verified ·
1 Parent(s): 109d23a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +180 -180
app.py CHANGED
@@ -1,180 +1,180 @@
1
- import streamlit as st
2
- from decouple import config
3
- import asyncio
4
- from langchain.chains import create_retrieval_chain
5
- from langchain.chains.combine_documents import create_stuff_documents_chain
6
- from langchain_groq import ChatGroq
7
- from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
8
- from langchain_core.messages import SystemMessage
9
- from scraper.scraper import process_urls
10
- from embedding.vector_store import initialize_vector_store, clear_chroma_db
11
- from conversation.talks import clean_input, small_talks
12
- import nest_asyncio
13
-
14
- nest_asyncio.apply()
15
- #Clearing ChromaDB at startup to clean up any previous data
16
- clear_chroma_db()
17
-
18
-
19
-
20
-
21
- #Groq API Key
22
- groq_api = config("GROQ_API_KEY")
23
-
24
- #Initializing LLM with memory
25
- llm = ChatGroq(model="llama-3.2-1b-preview", groq_api_key=groq_api, temperature=0)
26
-
27
-
28
-
29
- #Ensure proper asyncio handling for Windows
30
- import sys
31
- if sys.platform.startswith("win"):
32
- asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
33
-
34
- #Async helper function
35
- def run_asyncio_coroutine(coro):
36
- try:
37
- return asyncio.run(coro)
38
- except RuntimeError:
39
- loop = asyncio.new_event_loop()
40
- asyncio.set_event_loop(loop)
41
- return loop.run_until_complete(coro)
42
-
43
-
44
-
45
- import streamlit as st
46
-
47
- st.title("WebGPT 1.0 🤖")
48
-
49
- # URL inputs
50
- urls = st.text_area("Enter URLs (one per line)")
51
- run_scraper = st.button("Run Scraper", disabled=not urls.strip())
52
-
53
- # Sessions & states
54
- if "messages" not in st.session_state:
55
- st.session_state.messages = [] # Chat history
56
- if "history" not in st.session_state:
57
- st.session_state.history = "" # Stores past Q&A for memory
58
- if "scraping_done" not in st.session_state:
59
- st.session_state.scraping_done = False
60
- if "vector_store" not in st.session_state:
61
- st.session_state.vector_store = None
62
-
63
- # Run scraper
64
- if run_scraper:
65
- st.write("Fetching and processing URLs... This may take a while.")
66
- split_docs = run_asyncio_coroutine(process_urls(urls.split("\n")))
67
- st.session_state.vector_store = initialize_vector_store(split_docs)
68
- st.session_state.scraping_done = True
69
- st.success("Scraping and processing completed!")
70
-
71
- # ✅ Clear chat button
72
- if st.button("Clear Chat"):
73
- st.session_state.messages = [] # Reset message history
74
- st.session_state.history = "" # Reset history tracking
75
- st.success("Chat cleared!")
76
-
77
- # Ensuring chat only enables after scraping
78
- if not st.session_state.scraping_done:
79
- st.warning("Scrape some data first to enable chat!")
80
- else:
81
- st.write("### Chat With WebGPT 💬")
82
-
83
- # Display chat history
84
- for message in st.session_state.messages:
85
- role, text = message["role"], message["text"]
86
- with st.chat_message(role):
87
- st.write(text)
88
-
89
- # Takes in user input
90
- user_query = st.chat_input("Ask a question...")
91
-
92
- if user_query:
93
- st.session_state.messages.append({"role": "user", "text": user_query})
94
- with st.chat_message("user"):
95
- st.write(user_query)
96
-
97
- user_query_cleaned = clean_input(user_query)
98
- response = "" # Default value for response
99
- source_url = "" # Default value for source url
100
-
101
- # Check for small talk responses
102
- if user_query_cleaned in small_talks:
103
- response = small_talks[user_query_cleaned]
104
- source_url = "Knowledge base" # Small talk comes from the knowledge base
105
-
106
- else:
107
- # ✅ Setup retriever (with a similarity threshold or top-k retrieval)
108
- retriever = st.session_state.vector_store.as_retriever(
109
- search_kwargs={'k': 5}
110
- )
111
-
112
- # ✅ Retrieve context
113
- retrieved_docs = retriever.invoke(user_query_cleaned)
114
- retrieved_text = " ".join([doc.page_content for doc in retrieved_docs])
115
-
116
- # ✅ Define Langchain PromptTemplate properly
117
- system_prompt_template = PromptTemplate(
118
- input_variables=["context", "query"],
119
- template="""
120
- You are WebGPT, an AI assistant for question-answering tasks that **only answers questions based on the provided context**.
121
-
122
- - Understand the context first and provide a relevant answer.
123
- - If the answer is **not** found in the Context, reply with: "I can't find your request in the provided context."
124
- - If the question is **unrelated** to the Context, reply with: "I can't answer that. do not generate responses."
125
- - **Do not** use external knowledge, assumptions, or filler responses. Stick to the context provided.
126
- - Keep responses clear, concise, and relevant to the user’s query.
127
-
128
- Context:
129
- {context}
130
-
131
- Now, answer the user's question:
132
- {input}
133
- """
134
- )
135
-
136
- # ✅ Generate prompt with retrieved context & user query
137
- final_prompt = system_prompt_template.format(
138
- context=retrieved_text,
139
- input=user_query_cleaned
140
- )
141
-
142
- # ✅ Create chains (ensure the prompt is correct)
143
- scraper_chain = create_stuff_documents_chain(llm=llm, prompt=system_prompt_template)
144
- llm_chain = create_retrieval_chain(retriever, scraper_chain)
145
-
146
- # ✅ Process response and source
147
- if retrieved_docs:
148
- try:
149
- response_data = llm_chain.invoke({"context": retrieved_text, "input": user_query_cleaned})
150
- response = response_data.get("answer", "").strip()
151
- source_url = retrieved_docs[0].metadata.get("source", "Unknown")
152
-
153
- # Fallback if response is still empty
154
- if not response:
155
- response = "I can't find your request in the provided context."
156
- source_url = "No source found"
157
-
158
- except Exception as e:
159
- response = f"Error generating response: {str(e)}"
160
- source_url = "Error"
161
-
162
- else:
163
- response = "I can't find your request in the provided context."
164
- source_url = "No source found"
165
-
166
- # ✅ Track history & update session state
167
- history_text = "\n".join(
168
- [f"User: {msg['text']}" if msg["role"] == "user" else f"AI: {msg['text']}" for msg in st.session_state.messages]
169
- )
170
- st.session_state.history = history_text
171
-
172
- # ✅ Format and display response
173
- formatted_response = f"**Answer:** {response}"
174
- if response != "I can't find your request in the provided context." and source_url:
175
- formatted_response += f"\n\n**Source:** {source_url}"
176
-
177
- st.session_state.messages.append({"role": "assistant", "text": formatted_response})
178
- with st.chat_message("assistant"):
179
- st.write(formatted_response)
180
-
 
1
+ import streamlit as st
2
+ from decouple import config
3
+ import asyncio
4
+ from langchain.chains import create_retrieval_chain
5
+ from langchain.chains.combine_documents import create_stuff_documents_chain
6
+ from langchain_groq import ChatGroq
7
+ from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
8
+ from langchain_core.messages import SystemMessage
9
+ from scraper.scraper import process_urls
10
+ from embedding.vector_store import initialize_vector_store, clear_chroma_db
11
+ from conversation.talks import clean_input, small_talks
12
+ import nest_asyncio
13
+
14
+ nest_asyncio.apply()
15
+ #Clearing ChromaDB at startup to clean up any previous data
16
+ #clear_chroma_db()
17
+
18
+
19
+
20
+
21
+ #Groq API Key
22
+ groq_api = config("GROQ_API_KEY")
23
+
24
+ #Initializing LLM with memory
25
+ llm = ChatGroq(model="llama-3.2-1b-preview", groq_api_key=groq_api, temperature=0)
26
+
27
+
28
+
29
+ #Ensure proper asyncio handling for Windows
30
+ import sys
31
+ if sys.platform.startswith("win"):
32
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
33
+
34
+ #Async helper function
35
+ def run_asyncio_coroutine(coro):
36
+ try:
37
+ return asyncio.run(coro)
38
+ except RuntimeError:
39
+ loop = asyncio.new_event_loop()
40
+ asyncio.set_event_loop(loop)
41
+ return loop.run_until_complete(coro)
42
+
43
+
44
+
45
+ import streamlit as st
46
+
47
+ st.title("WebGPT 1.0 🤖")
48
+
49
+ # URL inputs
50
+ urls = st.text_area("Enter URLs (one per line)")
51
+ run_scraper = st.button("Run Scraper", disabled=not urls.strip())
52
+
53
+ # Sessions & states
54
+ if "messages" not in st.session_state:
55
+ st.session_state.messages = [] # Chat history
56
+ if "history" not in st.session_state:
57
+ st.session_state.history = "" # Stores past Q&A for memory
58
+ if "scraping_done" not in st.session_state:
59
+ st.session_state.scraping_done = False
60
+ if "vector_store" not in st.session_state:
61
+ st.session_state.vector_store = None
62
+
63
+ # Run scraper
64
+ if run_scraper:
65
+ st.write("Fetching and processing URLs... This may take a while.")
66
+ split_docs = run_asyncio_coroutine(process_urls(urls.split("\n")))
67
+ st.session_state.vector_store = initialize_vector_store(split_docs)
68
+ st.session_state.scraping_done = True
69
+ st.success("Scraping and processing completed!")
70
+
71
+ # ✅ Clear chat button
72
+ if st.button("Clear Chat"):
73
+ st.session_state.messages = [] # Reset message history
74
+ st.session_state.history = "" # Reset history tracking
75
+ st.success("Chat cleared!")
76
+
77
+ # Ensuring chat only enables after scraping
78
+ if not st.session_state.scraping_done:
79
+ st.warning("Scrape some data first to enable chat!")
80
+ else:
81
+ st.write("### Chat With WebGPT 💬")
82
+
83
+ # Display chat history
84
+ for message in st.session_state.messages:
85
+ role, text = message["role"], message["text"]
86
+ with st.chat_message(role):
87
+ st.write(text)
88
+
89
+ # Takes in user input
90
+ user_query = st.chat_input("Ask a question...")
91
+
92
+ if user_query:
93
+ st.session_state.messages.append({"role": "user", "text": user_query})
94
+ with st.chat_message("user"):
95
+ st.write(user_query)
96
+
97
+ user_query_cleaned = clean_input(user_query)
98
+ response = "" # Default value for response
99
+ source_url = "" # Default value for source url
100
+
101
+ # Check for small talk responses
102
+ if user_query_cleaned in small_talks:
103
+ response = small_talks[user_query_cleaned]
104
+ source_url = "Knowledge base" # Small talk comes from the knowledge base
105
+
106
+ else:
107
+ # ✅ Setup retriever (with a similarity threshold or top-k retrieval)
108
+ retriever = st.session_state.vector_store.as_retriever(
109
+ search_kwargs={'k': 5}
110
+ )
111
+
112
+ # ✅ Retrieve context
113
+ retrieved_docs = retriever.invoke(user_query_cleaned)
114
+ retrieved_text = " ".join([doc.page_content for doc in retrieved_docs])
115
+
116
+ # ✅ Define Langchain PromptTemplate properly
117
+ system_prompt_template = PromptTemplate(
118
+ input_variables=["context", "query"],
119
+ template="""
120
+ You are WebGPT, an AI assistant for question-answering tasks that **only answers questions based on the provided context**.
121
+
122
+ - Understand the context first and provide a relevant answer.
123
+ - If the answer is **not** found in the Context, reply with: "I can't find your request in the provided context."
124
+ - If the question is **unrelated** to the Context, reply with: "I can't answer that. do not generate responses."
125
+ - **Do not** use external knowledge, assumptions, or filler responses. Stick to the context provided.
126
+ - Keep responses clear, concise, and relevant to the user’s query.
127
+
128
+ Context:
129
+ {context}
130
+
131
+ Now, answer the user's question:
132
+ {input}
133
+ """
134
+ )
135
+
136
+ # ✅ Generate prompt with retrieved context & user query
137
+ final_prompt = system_prompt_template.format(
138
+ context=retrieved_text,
139
+ input=user_query_cleaned
140
+ )
141
+
142
+ # ✅ Create chains (ensure the prompt is correct)
143
+ scraper_chain = create_stuff_documents_chain(llm=llm, prompt=system_prompt_template)
144
+ llm_chain = create_retrieval_chain(retriever, scraper_chain)
145
+
146
+ # ✅ Process response and source
147
+ if retrieved_docs:
148
+ try:
149
+ response_data = llm_chain.invoke({"context": retrieved_text, "input": user_query_cleaned})
150
+ response = response_data.get("answer", "").strip()
151
+ source_url = retrieved_docs[0].metadata.get("source", "Unknown")
152
+
153
+ # Fallback if response is still empty
154
+ if not response:
155
+ response = "I can't find your request in the provided context."
156
+ source_url = "No source found"
157
+
158
+ except Exception as e:
159
+ response = f"Error generating response: {str(e)}"
160
+ source_url = "Error"
161
+
162
+ else:
163
+ response = "I can't find your request in the provided context."
164
+ source_url = "No source found"
165
+
166
+ # ✅ Track history & update session state
167
+ history_text = "\n".join(
168
+ [f"User: {msg['text']}" if msg["role"] == "user" else f"AI: {msg['text']}" for msg in st.session_state.messages]
169
+ )
170
+ st.session_state.history = history_text
171
+
172
+ # ✅ Format and display response
173
+ formatted_response = f"**Answer:** {response}"
174
+ if response != "I can't find your request in the provided context." and source_url:
175
+ formatted_response += f"\n\n**Source:** {source_url}"
176
+
177
+ st.session_state.messages.append({"role": "assistant", "text": formatted_response})
178
+ with st.chat_message("assistant"):
179
+ st.write(formatted_response)
180
+