RyanDA commited on
Commit
94bedd1
·
0 Parent(s):

Duplicate from RyanDA/US_History_QA

Browse files
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: US History QA
3
+ emoji: 📖
4
+ colorFrom: blue
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 3.39.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: bigscience-openrail-m
11
+ duplicated_from: RyanDA/US_History_QA
12
+ ---
13
+ # Overview
14
+ This is a basic Question-Answer style of chain using a database created from OpenStax's free US History textbook.
15
+ The question is embedded as a vector, the database retrieves the nearest vectors in the database, and that is then input into GPT-3.5-turbo to generate a cohesive answer.
16
+ This entire workflow is visible via the app.py file.
17
+
18
+ # Citation Information
19
+ Access the US History textbook (used in the database) for free at https://openstax.org/books/us-history/pages/1-introduction
20
+ I do not claim any of their content as my own.
app.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from langchain.embeddings.sentence_transformer import SentenceTransformerEmbeddings
3
+ from langchain.vectorstores import Chroma
4
+ import openai
5
+ import os
6
+
7
+ openai.api_key = os.environ["OPENAI_API_KEY"]
8
+
9
+ embedding = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
10
+ vectordb = Chroma(persist_directory='db',
11
+ embedding_function=embedding)
12
+ retriever = vectordb.as_retriever(search_kwargs={"k": 4})
13
+
14
+ def answer(message):
15
+ ctx = retriever.get_relevant_documents(message)
16
+
17
+ prompt = """Use the following pieces of context to answer the question at the end.
18
+ If you don't know the answer, just say that you don't know, don't try to
19
+ make up an answer.
20
+
21
+ {context}
22
+
23
+ Question: {question}
24
+ Helpful Answer: """.format(context = " ".join([doc.page_content for doc in ctx]), question = message)
25
+ response = openai.ChatCompletion.create(
26
+ model="gpt-3.5-turbo",
27
+ messages= [
28
+ {'role': 'user', 'content': prompt}
29
+ ],
30
+ temperature=0
31
+ )
32
+ sources = "\n\nSources:\n" + "\n".join([doc.metadata['source'] for doc in ctx])
33
+
34
+ ans = response['choices'][0]['message']['content'] + sources
35
+ return ans
36
+
37
+ iface = gr.Interface(fn=answer, inputs="text", outputs="text")
38
+ iface.launch()
db/305cbbe9-9334-4981-9f05-c90242c205d5/data_level0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c177925075bdef4e93a52e9c88ebf3984cd22ad2e99c68395b6a40d3a5559141
3
+ size 3352000
db/305cbbe9-9334-4981-9f05-c90242c205d5/header.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa74df29146ea4379b9d410d6f80c1e7a588b871a2adba5ba7981b85b3c975ed
3
+ size 100
db/305cbbe9-9334-4981-9f05-c90242c205d5/index_metadata.pickle ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c93375249ccb1889e8c85a557e79a4d042f168fdd755dc4f4f8a943c1c25536
3
+ size 113989
db/305cbbe9-9334-4981-9f05-c90242c205d5/length.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0599094165cf85af6d6020b52957285948d1e15063d3e0f7f2428b1a54e122b8
3
+ size 8000
db/305cbbe9-9334-4981-9f05-c90242c205d5/link_lists.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:137d697b2215372fe3747170478353e7965cd2692b58bac2eaa13c5baabbe511
3
+ size 17316
db/chroma.sqlite3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3e3d06dec869903f3c65a48d685ce2cbf72fec5f358bb1c198fbe9a3ef59b93
3
+ size 20537344
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ unstructured
2
+ langchain
3
+ sentence_transformers
4
+ chromadb
5
+ gradio
6
+ openai