EmbeddedAndrew commited on
Commit
607a79c
·
1 Parent(s): 8e7e766
Files changed (5) hide show
  1. .DS_Store +0 -0
  2. README.md +4 -4
  3. app.py +99 -0
  4. chain.py +127 -0
  5. requirements.txt +15 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Examin8
3
- emoji: 💻
4
- colorFrom: indigo
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 3.16.2
8
  app_file: app.py
 
1
  ---
2
+ title: Examinate
3
+ emoji: 🌖
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 3.16.2
8
  app_file: app.py
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import os
3
+
4
+ import gradio as gr
5
+ import langchain
6
+ import weaviate
7
+ from langchain.vectorstores import Weaviate
8
+
9
+ from chain import get_new_chain1
10
+
11
+ WEAVIATE_URL = os.environ["WEAVIATE_URL"]
12
+
13
+
14
+ def get_weaviate_store():
15
+ client = weaviate.Client(
16
+ url=WEAVIATE_URL,
17
+ additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]},
18
+ )
19
+ return Weaviate(client, "Paragraph", "content", attributes=["source"])
20
+
21
+
22
+ def set_openai_api_key(api_key, agent):
23
+ if api_key:
24
+ os.environ["OPENAI_API_KEY"] = api_key
25
+ vectorstore = get_weaviate_store()
26
+ qa_chain = get_new_chain1(vectorstore)
27
+ os.environ["OPENAI_API_KEY"] = ""
28
+ return qa_chain
29
+
30
+
31
+ def chat(inp, history, agent):
32
+ history = history or []
33
+ if agent is None:
34
+ history.append((inp, "Please paste your OpenAI key to use"))
35
+ return history, history
36
+ print("\n==== date/time: " + str(datetime.datetime.now()) + " ====")
37
+ print("inp: " + inp)
38
+ history = history or []
39
+ output = agent({"question": inp, "chat_history": history})
40
+ answer = output["answer"]
41
+ history.append((inp, answer))
42
+ print(history)
43
+ return history, history
44
+
45
+
46
+ block = gr.Blocks(css=".gradio-container {background-color: lightblue}")
47
+
48
+ with block:
49
+ with gr.Row():
50
+ gr.Markdown("<h3><center>Elenchos AI</center></h3>")
51
+
52
+ openai_api_key_textbox = gr.Textbox(
53
+ placeholder="Paste your OpenAI API key (sk-...)",
54
+ show_label=False,
55
+ lines=1,
56
+ type="password",
57
+ )
58
+
59
+ chatbot = gr.Chatbot()
60
+
61
+ with gr.Row():
62
+ message = gr.Textbox(
63
+ label="What would you like to learn about marine biology?",
64
+ placeholder="What is an estuary?",
65
+ lines=1,
66
+ )
67
+ submit = gr.Button(value="Send", variant="secondary").style(full_width=False)
68
+
69
+ gr.Examples(
70
+ examples=[
71
+ "What are phytoplankton?",
72
+ "How does microplastic pollution affect the oceans?",
73
+ "What are artificial reefs?",
74
+ ],
75
+ inputs=message,
76
+ )
77
+
78
+ gr.HTML(
79
+ """
80
+ This simple application is an implementation of ChatGPT but over an external dataset (in this case, Wikipedia pages on Marine biology)."""
81
+ )
82
+
83
+ gr.HTML(
84
+ "<center>Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a></center>"
85
+ )
86
+
87
+ state = gr.State()
88
+ agent_state = gr.State()
89
+
90
+ submit.click(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
91
+ message.submit(chat, inputs=[message, state, agent_state], outputs=[chatbot, state])
92
+
93
+ openai_api_key_textbox.change(
94
+ set_openai_api_key,
95
+ inputs=[openai_api_key_textbox, agent_state],
96
+ outputs=[agent_state],
97
+ )
98
+
99
+ block.launch(debug=True)
chain.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import pathlib
4
+ from typing import Dict, List, Tuple
5
+
6
+ import weaviate
7
+ from langchain import OpenAI, PromptTemplate
8
+ from langchain.chains import LLMChain
9
+ from langchain.chains.base import Chain
10
+ from langchain.chains.combine_documents.base import BaseCombineDocumentsChain
11
+ from langchain.chains.conversation.memory import ConversationBufferMemory
12
+ from langchain.chains.question_answering import load_qa_chain
13
+ from langchain.embeddings import OpenAIEmbeddings
14
+ from langchain.prompts import FewShotPromptTemplate, PromptTemplate
15
+ from langchain.prompts.example_selector import \
16
+ SemanticSimilarityExampleSelector
17
+ from langchain.vectorstores import FAISS, Weaviate
18
+ from pydantic import BaseModel
19
+
20
+
21
+ class CustomChain(Chain, BaseModel):
22
+
23
+ vstore: Weaviate
24
+ chain: BaseCombineDocumentsChain
25
+ key_word_extractor: Chain
26
+
27
+ @property
28
+ def input_keys(self) -> List[str]:
29
+ return ["question"]
30
+
31
+ @property
32
+ def output_keys(self) -> List[str]:
33
+ return ["answer"]
34
+
35
+ def _call(self, inputs: Dict[str, str]) -> Dict[str, str]:
36
+ question = inputs["question"]
37
+ chat_history_str = _get_chat_history(inputs["chat_history"])
38
+ if chat_history_str:
39
+ new_question = self.key_word_extractor.run(
40
+ question=question, chat_history=chat_history_str
41
+ )
42
+ else:
43
+ new_question = question
44
+ print(new_question)
45
+ docs = self.vstore.similarity_search(new_question, k=4)
46
+ new_inputs = inputs.copy()
47
+ new_inputs["question"] = new_question
48
+ new_inputs["chat_history"] = chat_history_str
49
+ answer, _ = self.chain.combine_docs(docs, **new_inputs)
50
+ return {"answer": answer}
51
+
52
+
53
+ def get_new_chain1(vectorstore) -> Chain:
54
+ WEAVIATE_URL = os.environ["WEAVIATE_URL"]
55
+ client = weaviate.Client(
56
+ url=WEAVIATE_URL,
57
+ additional_headers={"X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"]},
58
+ )
59
+
60
+ _eg_template = """## Example:
61
+
62
+ Chat History:
63
+ {chat_history}
64
+ Follow Up Input: {question}
65
+ Standalone question: {answer}"""
66
+ _eg_prompt = PromptTemplate(
67
+ template=_eg_template,
68
+ input_variables=["chat_history", "question", "answer"],
69
+ )
70
+
71
+ _prefix = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. You should assume that the question is related to marine biology."""
72
+ _suffix = """## Example:
73
+
74
+ Chat History:
75
+ {chat_history}
76
+ Follow Up Input: {question}
77
+ Standalone question:"""
78
+ eg_store = Weaviate(
79
+ client,
80
+ "Rephrase",
81
+ "content",
82
+ attributes=["question", "answer", "chat_history"],
83
+ )
84
+ example_selector = SemanticSimilarityExampleSelector(vectorstore=eg_store, k=4)
85
+ prompt = FewShotPromptTemplate(
86
+ prefix=_prefix,
87
+ suffix=_suffix,
88
+ example_selector=example_selector,
89
+ example_prompt=_eg_prompt,
90
+ input_variables=["question", "chat_history"],
91
+ )
92
+ llm = OpenAI(temperature=0, model_name="text-davinci-003")
93
+ key_word_extractor = LLMChain(llm=llm, prompt=prompt)
94
+
95
+ EXAMPLE_PROMPT = PromptTemplate(
96
+ template=">Example:\nContent:\n---------\n{page_content}\n----------\nSource: {source}",
97
+ input_variables=["page_content", "source"],
98
+ )
99
+ template = """You are an AI assistant for Wikipedia information about marine biology.
100
+ You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the wikipedia page.
101
+ You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
102
+ If you don't know the answer, just say "Hmm, I'm not sure." Don't try to make up an answer.
103
+ If the question is not about marine biology, the oceans, or biology, politely inform them that you are tuned to only answer questions about marine biology.
104
+ Question: {question}
105
+ =========
106
+ {context}
107
+ =========
108
+ Answer in Markdown:"""
109
+ PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])
110
+ doc_chain = load_qa_chain(
111
+ OpenAI(temperature=0, model_name="text-davinci-003", max_tokens=-1),
112
+ chain_type="stuff",
113
+ prompt=PROMPT,
114
+ document_prompt=EXAMPLE_PROMPT,
115
+ )
116
+ return CustomChain(
117
+ chain=doc_chain, vstore=vectorstore, key_word_extractor=key_word_extractor
118
+ )
119
+
120
+
121
+ def _get_chat_history(chat_history: List[Tuple[str, str]]):
122
+ buffer = ""
123
+ for human_s, ai_s in chat_history:
124
+ human = f"Human: " + human_s
125
+ ai = f"Assistant: " + ai_s
126
+ buffer += "\n" + "\n".join([human, ai])
127
+ return buffer
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.0.64
2
+ beautifulsoup4
3
+ weaviate-client
4
+ openai
5
+ black
6
+ isort
7
+ Flask
8
+ transformers
9
+ gradio
10
+ wikipedia
11
+ gpt-index
12
+ requests==2.28.2
13
+ boto3
14
+ pygit2
15
+ better_profanity