Spaces:

profoz
/

index_demo

Paused

profoz commited on Nov 16, 2022

Commit

1fef6a8

1 Parent(s): 046385a

up[dates

Files changed (2) hide show

.gitignore CHANGED Viewed

@@ -1,2 +1,3 @@
 .DS_Store
 .streamlit/

+.idea/
 .DS_Store
 .streamlit/

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import openai
 all_documents = {}
-def qa_gpt3(question, context):
     openai.api_key = st.secrets["openai_key"]
     response = openai.Completion.create(
@@ -37,7 +37,7 @@ value='https://www.databricks.com/blog/2022/11/15/values-define-databricks-cultu
 query = st.text_input("Query")
 qa_option = st.selectbox('Q/A Answerer', ('gpt3', 'a-ware/bart-squadv2'))
-tokenizing = st.selectbox('How to Tokenize', ("Don't (use entire body as document)", 'Newline (split by newline character)'))
 if qa_option == 'gpt3':
     qa_model = qa_gpt3
@@ -85,7 +85,9 @@ def get_documents(document_text, crawl=crawl_urls):
         if tokenizing == "Don't (use entire body as document)":
             document_paragraphs = [body]
         elif tokenizing == 'Newline (split by newline character)':
-            document_paragraphs = [n for n in body.split('\n') if len(n) > 50]
         for document_paragraph in document_paragraphs:
             all_documents[document_paragraph] = url

 all_documents = {}
+def qa_gpt3(query, context):
     openai.api_key = st.secrets["openai_key"]
     response = openai.Completion.create(
 query = st.text_input("Query")
 qa_option = st.selectbox('Q/A Answerer', ('gpt3', 'a-ware/bart-squadv2'))
+tokenizing = st.selectbox('How to Tokenize', ("Don't (use entire body as document)", 'Newline (split by newline character)', 'Combo'))
 if qa_option == 'gpt3':
     qa_model = qa_gpt3
         if tokenizing == "Don't (use entire body as document)":
             document_paragraphs = [body]
         elif tokenizing == 'Newline (split by newline character)':
+            document_paragraphs = [n for n in body.split('\n') if len(n) > 250]
+        elif tokenizing == 'Combo':
+            document_paragraphs = [body] + [n for n in body.split('\n') if len(n) > 250]
         for document_paragraph in document_paragraphs:
             all_documents[document_paragraph] = url