Spaces:

mahynski
/

RAG

Sleeping

App Files Files Community

mahynski commited on Aug 7, 2024

Commit

a94912c

1 Parent(s): c1ff6b3

degbu embeddings

Browse files

Files changed (1) hide show

app.py +7 -9

app.py CHANGED Viewed

@@ -76,19 +76,19 @@ def main():
         )
         # Create LLM
         if llm_token is not None:
             if provider == 'openai':
                 os.environ["OPENAI_API_KEY"] = str(llm_token)
                 Settings.llm = OpenAI(
                     model=llm_name,
                     temperature=temperature,
-                    # max_tokens=max_output_tokens
                 )
-                # Global tokenization needs to be consistent with LLM
-                # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
                 Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
                 Settings.num_output = max_output_tokens
-                # Settings.context_window = 4096 # max possible
                 Settings.embed_model = OpenAIEmbedding()
             elif provider == 'huggingface':
                 os.environ['HFTOKEN'] = str(llm_token)
@@ -101,7 +101,7 @@ def main():
         uploaded_file = st.file_uploader(
             "Choose a PDF file to upload",
-            # type=['pdf'],
             accept_multiple_files=False
         )
@@ -109,7 +109,7 @@ def main():
         if uploaded_file is not None:
             # Parse the file
             parser = LlamaParse(
-                api_key=parse_token,  # can also be set in your env as LLAMA_CLOUD_API_KEY
                 result_type="text"  # "markdown" and "text" are available
             )
@@ -139,7 +139,7 @@ def main():
         prompt_txt = 'Summarize this document in a 3-5 sentences.'
         prompt = st.text_area(
-            label="Enter you query.",
             key="prompt_widget",
             value=prompt_txt
         )
@@ -154,13 +154,11 @@ def main():
         tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])
         with tab1:
-            # st.header('This is the raw file you uploaded.')
             if uploaded_file is not None: # Display the pdf
                 bytes_data = uploaded_file.getvalue()
                 pdf_viewer(input=bytes_data, width=700)
         with tab2:
-            # st.header('This is the parsed version of the file.')
             if parsed_document is not None: # Showed the raw parsing result
                 st.write(parsed_document)

         )
         # Create LLM
+        # Global tokenization needs to be consistent with LLM for token counting
+        # https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
         if llm_token is not None:
             if provider == 'openai':
                 os.environ["OPENAI_API_KEY"] = str(llm_token)
                 Settings.llm = OpenAI(
                     model=llm_name,
                     temperature=temperature,
+                    max_tokens=max_output_tokens
                 )
                 Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
                 Settings.num_output = max_output_tokens
+                Settings.context_window = 4096 # max possible
                 Settings.embed_model = OpenAIEmbedding()
             elif provider == 'huggingface':
                 os.environ['HFTOKEN'] = str(llm_token)
         uploaded_file = st.file_uploader(
             "Choose a PDF file to upload",
+            type=['pdf'],
             accept_multiple_files=False
         )
         if uploaded_file is not None:
             # Parse the file
             parser = LlamaParse(
+                api_key=parse_token,  # Can also be set in your env as LLAMA_CLOUD_API_KEY
                 result_type="text"  # "markdown" and "text" are available
             )
         prompt_txt = 'Summarize this document in a 3-5 sentences.'
         prompt = st.text_area(
+            label="Enter your query.",
             key="prompt_widget",
             value=prompt_txt
         )
         tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])
         with tab1:
             if uploaded_file is not None: # Display the pdf
                 bytes_data = uploaded_file.getvalue()
                 pdf_viewer(input=bytes_data, width=700)
         with tab2:
             if parsed_document is not None: # Showed the raw parsing result
                 st.write(parsed_document)