robjm16 commited on
Commit
275329e
·
1 Parent(s): d456449

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -13,12 +13,12 @@ Far more robust domain-specific responses are possible by further customizing/re
13
 
14
  ################################# LOAD LIBRARIES/IMPORTS #########################################
15
 
16
- ! pip install openai --quiet
17
- ! pip install transformers --quiet
18
- ! pip install gradio --quiet
19
- ! pip install PyPDF2 --quiet
20
- ! pip install python-docx --quiet
21
- ! pip install pandas --quiet
22
 
23
 
24
  import docx
@@ -45,7 +45,7 @@ openai.api_key = os.environ["API_KEY"]
45
  MODEL_NAME = "curie"
46
  DOC_EMBEDDINGS_MODEL = f"text-search-{MODEL_NAME}-doc-001"
47
  QUERY_EMBEDDINGS_MODEL = f"text-search-{MODEL_NAME}-query-001"
48
- MAX_SECTION_LEN =1100 # The API limits total tokens -- for the prompt containing the wuestion and domain-specific content and the answer -- to 2048 tokens, or about 1500 words.
49
  SEPARATOR = "\n* " # A string called SEPARATOR is defined as the newline character followed by an asterisk and a space. This string will be used as a separator between different pieces of text.
50
  tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
51
  separator_len = len(tokenizer.tokenize(SEPARATOR))
 
13
 
14
  ################################# LOAD LIBRARIES/IMPORTS #########################################
15
 
16
+ pip install openai --quiet
17
+ pip install transformers --quiet
18
+ pip install gradio --quiet
19
+ pip install PyPDF2 --quiet
20
+ pip install python-docx --quiet
21
+ pip install pandas --quiet
22
 
23
 
24
  import docx
 
45
  MODEL_NAME = "curie"
46
  DOC_EMBEDDINGS_MODEL = f"text-search-{MODEL_NAME}-doc-001"
47
  QUERY_EMBEDDINGS_MODEL = f"text-search-{MODEL_NAME}-query-001"
48
+ MAX_SECTION_LEN =1100 # The API limits total tokens -- for the prompt containing the question and domain-specific content and the answer -- to 2048 tokens, or about 1500 words.
49
  SEPARATOR = "\n* " # A string called SEPARATOR is defined as the newline character followed by an asterisk and a space. This string will be used as a separator between different pieces of text.
50
  tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
51
  separator_len = len(tokenizer.tokenize(SEPARATOR))