smitathkr1 commited on
Commit
949291e
·
verified ·
1 Parent(s): a2baf60

Upload 2 files

Browse files
Files changed (3) hide show
  1. pages +0 -0
  2. pages/charts.py +33 -0
  3. pages/search.py +177 -0
pages DELETED
File without changes
pages/charts.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import altair as alt
3
+ from vega_datasets import data
4
+
5
+ source = data.cars()
6
+ input = st.chat_input(
7
+ placeholder="Type a message",
8
+ key="input",
9
+ )
10
+ chart = alt.Chart(source).mark_circle().encode(
11
+ x='Horsepower',
12
+ y='Miles_per_Gallon',
13
+ color='Origin',
14
+ ).interactive()
15
+
16
+ tab1, tab2 = st.tabs(["Streamlit theme (default)", "Altair native theme"])
17
+
18
+ with tab1:
19
+ # Use the Streamlit theme.
20
+ # This is the default. So you can also omit the theme argument.
21
+ st.altair_chart(chart, theme="streamlit", use_container_width=True)
22
+ with tab2:
23
+ # Use the native Altair theme.
24
+ st.altair_chart(chart, theme=None, use_container_width=True)
25
+
26
+ message = st.chat_message("assistant")
27
+ message.write("Hello human")
28
+ message.altair_chart(chart, theme=None, use_container_width=True)
29
+
30
+ if input == "show me sales":
31
+ message = st.chat_message("assistant")
32
+ message.write("Here are the sales")
33
+ message.altair_chart(chart, theme="streamlit", use_container_width=True)
pages/search.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import queue
3
+ import re
4
+ import tempfile
5
+ import threading
6
+ from groq import Groq
7
+ import streamlit as st
8
+
9
+ from embedchain import App
10
+ from embedchain.config import BaseLlmConfig
11
+ from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
12
+ generate)
13
+
14
+ client_groq = Groq(api_key="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")
15
+ def embedchain_bot(db_path, api_key):
16
+ return App.from_config(
17
+ config={
18
+ "llm": {
19
+ "provider": "openai",
20
+ "config": {
21
+ "model": "gpt-3.5-turbo-1106",
22
+ "temperature": 0.5,
23
+ "max_tokens": 4096,
24
+ "top_p": 1,
25
+ "stream": True,
26
+ "api_key": api_key,
27
+ },
28
+ },
29
+ "vectordb": {
30
+ "provider": "chroma",
31
+ "config": {"collection_name": "chat-pdf", "dir": db_path, "allow_reset": True},
32
+ },
33
+ "embedder": {"provider": "openai", "config": {"api_key": api_key}},
34
+ "chunker": {"chunk_size": 20000, "chunk_overlap": 0, "length_function": "len"},
35
+ }
36
+ )
37
+
38
+
39
+ def get_db_path():
40
+ tmpdirname = tempfile.mkdtemp()
41
+ return tmpdirname
42
+
43
+
44
+ def get_ec_app(api_key):
45
+ if "app" in st.session_state:
46
+ print("Found app in session state")
47
+ app = st.session_state.app
48
+ else:
49
+ print("Creating app")
50
+ db_path = get_db_path()
51
+ app = embedchain_bot(db_path, api_key)
52
+ st.session_state.app = app
53
+ return app
54
+
55
+
56
+ with st.sidebar:
57
+ #openai_access_token = st.text_input("OpenAI API Key", key="api_key", type="password")
58
+ st.session_state.api_key = "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"
59
+ if st.session_state.api_key:
60
+ app = get_ec_app(st.session_state.api_key)
61
+
62
+ pdf_files = st.file_uploader("Upload your PDF files", accept_multiple_files=True, type="pdf")
63
+ add_pdf_files = st.session_state.get("add_pdf_files", [])
64
+ for pdf_file in pdf_files:
65
+ file_name = pdf_file.name
66
+ if file_name in add_pdf_files:
67
+ continue
68
+ try:
69
+ if not st.session_state.api_key:
70
+ st.error("Please enter your OpenAI API Key")
71
+ st.stop()
72
+ temp_file_name = None
73
+ with tempfile.NamedTemporaryFile(mode="wb", delete=False, prefix=file_name, suffix=".pdf") as f:
74
+ f.write(pdf_file.getvalue())
75
+ temp_file_name = f.name
76
+ if temp_file_name:
77
+ st.markdown(f"Adding {file_name} to knowledge base...")
78
+ app.add(temp_file_name, data_type="pdf_file")
79
+ st.markdown("")
80
+ add_pdf_files.append(file_name)
81
+ os.remove(temp_file_name)
82
+ st.session_state.messages.append({"role": "assistant", "content": f"Added {file_name} to knowledge base!"})
83
+ except Exception as e:
84
+ st.error(f"Error adding {file_name} to knowledge base: {e}")
85
+ st.stop()
86
+ st.session_state["add_pdf_files"] = add_pdf_files
87
+
88
+ st.title("📄 Embedchain - Chat with PDF")
89
+ styled_caption = '<p style="font-size: 17px; color: #aaa;">🚀 An <a href="https://github.com/embedchain/embedchain">Embedchain</a> app powered by OpenAI!</p>' # noqa: E501
90
+ st.markdown(styled_caption, unsafe_allow_html=True)
91
+
92
+ if "messages" not in st.session_state:
93
+ st.session_state.messages = [
94
+ {
95
+ "role": "assistant",
96
+ "content": """
97
+ Hi! I'm chatbot powered by Embedchain, which can answer questions about your pdf documents.\n
98
+ Upload your pdf documents here and I'll answer your questions about them!
99
+ """,
100
+ }
101
+ ]
102
+
103
+ for message in st.session_state.messages:
104
+ with st.chat_message(message["role"]):
105
+ st.markdown(message["content"])
106
+
107
+ if prompt := st.chat_input("Ask me anything!"):
108
+ if not st.session_state.api_key:
109
+ st.error("Please enter your OpenAI API Key", icon="🤖")
110
+ st.stop()
111
+
112
+ app = get_ec_app(st.session_state.api_key)
113
+
114
+ with st.chat_message("user"):
115
+ st.session_state.messages.append({"role": "user", "content": prompt})
116
+ st.markdown(prompt)
117
+
118
+ with st.chat_message("assistant"):
119
+ msg_placeholder = st.empty()
120
+ msg_placeholder.markdown("Thinking...")
121
+ full_response = ""
122
+
123
+ q = queue.Queue()
124
+
125
+ def app_response(result):
126
+ llm_config = app.llm.config.as_dict()
127
+ llm_config["callbacks"] = [StreamingStdOutCallbackHandlerYield(q=q)]
128
+ config = BaseLlmConfig(**llm_config)
129
+ answer, citations = app.chat(prompt, config=config, citations=True)
130
+ result["answer"] = answer
131
+ result["citations"] = citations
132
+
133
+ results = {}
134
+ thread = threading.Thread(target=app_response, args=(results,))
135
+ thread.start()
136
+
137
+ for answer_chunk in generate(q):
138
+ full_response += answer_chunk
139
+ msg_placeholder.markdown(full_response)
140
+
141
+ thread.join()
142
+ answer, citations = results["answer"], results["citations"]
143
+ if citations:
144
+ full_response += "\n\n**Sources**:\n"
145
+ sources = []
146
+ for i, citation in enumerate(citations):
147
+ source = citation[1]["url"]
148
+ pattern = re.compile(r"([^/]+)\.[^\.]+\.pdf$")
149
+ match = pattern.search(source)
150
+ if match:
151
+ source = match.group(1) + ".pdf"
152
+ sources.append(source)
153
+ sources = list(set(sources))
154
+ for source in sources:
155
+ full_response += f"- {source}\n"
156
+
157
+ completion = client_groq.chat.completions.create(
158
+ model="mixtral-8x7b-32768",
159
+ messages=[
160
+ {
161
+ "role": "system",
162
+ "content" : "You are a helpful assistant helping elaborate on teh given topics and also remove any negative words or phrases taht you receive as your prompt in teh input text",
163
+ "role": "user",
164
+ "content": "Please expand on teh following " " Text: " + full_response + " " "Topic: " + prompt,
165
+ }
166
+ ],
167
+ temperature=0.5,
168
+ max_tokens=1324,
169
+ top_p=1,
170
+ stream=False,
171
+ stop=None,
172
+ )
173
+ full_response = completion.choices[0].message.content
174
+ msg_placeholder.markdown(full_response)
175
+
176
+ print("Answer: ", full_response)
177
+ st.session_state.messages.append({"role": "assistant", "content": full_response})