Spaces:
Paused
Paused
up[dates
Browse files- .gitignore +1 -0
- app.py +5 -3
.gitignore
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
|
|
| 1 |
.DS_Store
|
| 2 |
.streamlit/
|
|
|
|
| 1 |
+
.idea/
|
| 2 |
.DS_Store
|
| 3 |
.streamlit/
|
app.py
CHANGED
|
@@ -9,7 +9,7 @@ import openai
|
|
| 9 |
|
| 10 |
all_documents = {}
|
| 11 |
|
| 12 |
-
def qa_gpt3(
|
| 13 |
openai.api_key = st.secrets["openai_key"]
|
| 14 |
|
| 15 |
response = openai.Completion.create(
|
|
@@ -37,7 +37,7 @@ value='https://www.databricks.com/blog/2022/11/15/values-define-databricks-cultu
|
|
| 37 |
query = st.text_input("Query")
|
| 38 |
|
| 39 |
qa_option = st.selectbox('Q/A Answerer', ('gpt3', 'a-ware/bart-squadv2'))
|
| 40 |
-
tokenizing = st.selectbox('How to Tokenize', ("Don't (use entire body as document)", 'Newline (split by newline character)'))
|
| 41 |
|
| 42 |
if qa_option == 'gpt3':
|
| 43 |
qa_model = qa_gpt3
|
|
@@ -85,7 +85,9 @@ def get_documents(document_text, crawl=crawl_urls):
|
|
| 85 |
if tokenizing == "Don't (use entire body as document)":
|
| 86 |
document_paragraphs = [body]
|
| 87 |
elif tokenizing == 'Newline (split by newline character)':
|
| 88 |
-
document_paragraphs = [n for n in body.split('\n') if len(n) >
|
|
|
|
|
|
|
| 89 |
|
| 90 |
for document_paragraph in document_paragraphs:
|
| 91 |
all_documents[document_paragraph] = url
|
|
|
|
| 9 |
|
| 10 |
all_documents = {}
|
| 11 |
|
| 12 |
+
def qa_gpt3(query, context):
|
| 13 |
openai.api_key = st.secrets["openai_key"]
|
| 14 |
|
| 15 |
response = openai.Completion.create(
|
|
|
|
| 37 |
query = st.text_input("Query")
|
| 38 |
|
| 39 |
qa_option = st.selectbox('Q/A Answerer', ('gpt3', 'a-ware/bart-squadv2'))
|
| 40 |
+
tokenizing = st.selectbox('How to Tokenize', ("Don't (use entire body as document)", 'Newline (split by newline character)', 'Combo'))
|
| 41 |
|
| 42 |
if qa_option == 'gpt3':
|
| 43 |
qa_model = qa_gpt3
|
|
|
|
| 85 |
if tokenizing == "Don't (use entire body as document)":
|
| 86 |
document_paragraphs = [body]
|
| 87 |
elif tokenizing == 'Newline (split by newline character)':
|
| 88 |
+
document_paragraphs = [n for n in body.split('\n') if len(n) > 250]
|
| 89 |
+
elif tokenizing == 'Combo':
|
| 90 |
+
document_paragraphs = [body] + [n for n in body.split('\n') if len(n) > 250]
|
| 91 |
|
| 92 |
for document_paragraph in document_paragraphs:
|
| 93 |
all_documents[document_paragraph] = url
|