mahynski commited on
Commit
a94912c
·
1 Parent(s): c1ff6b3

degbu embeddings

Browse files
Files changed (1) hide show
  1. app.py +7 -9
app.py CHANGED
@@ -76,19 +76,19 @@ def main():
76
  )
77
 
78
  # Create LLM
 
 
79
  if llm_token is not None:
80
  if provider == 'openai':
81
  os.environ["OPENAI_API_KEY"] = str(llm_token)
82
  Settings.llm = OpenAI(
83
  model=llm_name,
84
  temperature=temperature,
85
- # max_tokens=max_output_tokens
86
  )
87
- # Global tokenization needs to be consistent with LLM
88
- # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
89
  Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
90
  Settings.num_output = max_output_tokens
91
- # Settings.context_window = 4096 # max possible
92
  Settings.embed_model = OpenAIEmbedding()
93
  elif provider == 'huggingface':
94
  os.environ['HFTOKEN'] = str(llm_token)
@@ -101,7 +101,7 @@ def main():
101
 
102
  uploaded_file = st.file_uploader(
103
  "Choose a PDF file to upload",
104
- # type=['pdf'],
105
  accept_multiple_files=False
106
  )
107
 
@@ -109,7 +109,7 @@ def main():
109
  if uploaded_file is not None:
110
  # Parse the file
111
  parser = LlamaParse(
112
- api_key=parse_token, # can also be set in your env as LLAMA_CLOUD_API_KEY
113
  result_type="text" # "markdown" and "text" are available
114
  )
115
 
@@ -139,7 +139,7 @@ def main():
139
 
140
  prompt_txt = 'Summarize this document in a 3-5 sentences.'
141
  prompt = st.text_area(
142
- label="Enter you query.",
143
  key="prompt_widget",
144
  value=prompt_txt
145
  )
@@ -154,13 +154,11 @@ def main():
154
  tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])
155
 
156
  with tab1:
157
- # st.header('This is the raw file you uploaded.')
158
  if uploaded_file is not None: # Display the pdf
159
  bytes_data = uploaded_file.getvalue()
160
  pdf_viewer(input=bytes_data, width=700)
161
 
162
  with tab2:
163
- # st.header('This is the parsed version of the file.')
164
  if parsed_document is not None: # Showed the raw parsing result
165
  st.write(parsed_document)
166
 
 
76
  )
77
 
78
  # Create LLM
79
+ # Global tokenization needs to be consistent with LLM for token counting
80
+ # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
81
  if llm_token is not None:
82
  if provider == 'openai':
83
  os.environ["OPENAI_API_KEY"] = str(llm_token)
84
  Settings.llm = OpenAI(
85
  model=llm_name,
86
  temperature=temperature,
87
+ max_tokens=max_output_tokens
88
  )
 
 
89
  Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
90
  Settings.num_output = max_output_tokens
91
+ Settings.context_window = 4096 # max possible
92
  Settings.embed_model = OpenAIEmbedding()
93
  elif provider == 'huggingface':
94
  os.environ['HFTOKEN'] = str(llm_token)
 
101
 
102
  uploaded_file = st.file_uploader(
103
  "Choose a PDF file to upload",
104
+ type=['pdf'],
105
  accept_multiple_files=False
106
  )
107
 
 
109
  if uploaded_file is not None:
110
  # Parse the file
111
  parser = LlamaParse(
112
+ api_key=parse_token, # Can also be set in your env as LLAMA_CLOUD_API_KEY
113
  result_type="text" # "markdown" and "text" are available
114
  )
115
 
 
139
 
140
  prompt_txt = 'Summarize this document in a 3-5 sentences.'
141
  prompt = st.text_area(
142
+ label="Enter your query.",
143
  key="prompt_widget",
144
  value=prompt_txt
145
  )
 
154
  tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])
155
 
156
  with tab1:
 
157
  if uploaded_file is not None: # Display the pdf
158
  bytes_data = uploaded_file.getvalue()
159
  pdf_viewer(input=bytes_data, width=700)
160
 
161
  with tab2:
 
162
  if parsed_document is not None: # Showed the raw parsing result
163
  st.write(parsed_document)
164