degbu embeddings
Browse files
app.py
CHANGED
|
@@ -76,19 +76,19 @@ def main():
|
|
| 76 |
)
|
| 77 |
|
| 78 |
# Create LLM
|
|
|
|
|
|
|
| 79 |
if llm_token is not None:
|
| 80 |
if provider == 'openai':
|
| 81 |
os.environ["OPENAI_API_KEY"] = str(llm_token)
|
| 82 |
Settings.llm = OpenAI(
|
| 83 |
model=llm_name,
|
| 84 |
temperature=temperature,
|
| 85 |
-
|
| 86 |
)
|
| 87 |
-
# Global tokenization needs to be consistent with LLM
|
| 88 |
-
# https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
|
| 89 |
Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
|
| 90 |
Settings.num_output = max_output_tokens
|
| 91 |
-
|
| 92 |
Settings.embed_model = OpenAIEmbedding()
|
| 93 |
elif provider == 'huggingface':
|
| 94 |
os.environ['HFTOKEN'] = str(llm_token)
|
|
@@ -101,7 +101,7 @@ def main():
|
|
| 101 |
|
| 102 |
uploaded_file = st.file_uploader(
|
| 103 |
"Choose a PDF file to upload",
|
| 104 |
-
|
| 105 |
accept_multiple_files=False
|
| 106 |
)
|
| 107 |
|
|
@@ -109,7 +109,7 @@ def main():
|
|
| 109 |
if uploaded_file is not None:
|
| 110 |
# Parse the file
|
| 111 |
parser = LlamaParse(
|
| 112 |
-
api_key=parse_token, #
|
| 113 |
result_type="text" # "markdown" and "text" are available
|
| 114 |
)
|
| 115 |
|
|
@@ -139,7 +139,7 @@ def main():
|
|
| 139 |
|
| 140 |
prompt_txt = 'Summarize this document in a 3-5 sentences.'
|
| 141 |
prompt = st.text_area(
|
| 142 |
-
label="Enter
|
| 143 |
key="prompt_widget",
|
| 144 |
value=prompt_txt
|
| 145 |
)
|
|
@@ -154,13 +154,11 @@ def main():
|
|
| 154 |
tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])
|
| 155 |
|
| 156 |
with tab1:
|
| 157 |
-
# st.header('This is the raw file you uploaded.')
|
| 158 |
if uploaded_file is not None: # Display the pdf
|
| 159 |
bytes_data = uploaded_file.getvalue()
|
| 160 |
pdf_viewer(input=bytes_data, width=700)
|
| 161 |
|
| 162 |
with tab2:
|
| 163 |
-
# st.header('This is the parsed version of the file.')
|
| 164 |
if parsed_document is not None: # Showed the raw parsing result
|
| 165 |
st.write(parsed_document)
|
| 166 |
|
|
|
|
| 76 |
)
|
| 77 |
|
| 78 |
# Create LLM
|
| 79 |
+
# Global tokenization needs to be consistent with LLM for token counting
|
| 80 |
+
# https://docs.llamaindex.ai/en/stable/module_guides/models/llms/
|
| 81 |
if llm_token is not None:
|
| 82 |
if provider == 'openai':
|
| 83 |
os.environ["OPENAI_API_KEY"] = str(llm_token)
|
| 84 |
Settings.llm = OpenAI(
|
| 85 |
model=llm_name,
|
| 86 |
temperature=temperature,
|
| 87 |
+
max_tokens=max_output_tokens
|
| 88 |
)
|
|
|
|
|
|
|
| 89 |
Settings.tokenizer = tiktoken.encoding_for_model(llm_name).encode
|
| 90 |
Settings.num_output = max_output_tokens
|
| 91 |
+
Settings.context_window = 4096 # max possible
|
| 92 |
Settings.embed_model = OpenAIEmbedding()
|
| 93 |
elif provider == 'huggingface':
|
| 94 |
os.environ['HFTOKEN'] = str(llm_token)
|
|
|
|
| 101 |
|
| 102 |
uploaded_file = st.file_uploader(
|
| 103 |
"Choose a PDF file to upload",
|
| 104 |
+
type=['pdf'],
|
| 105 |
accept_multiple_files=False
|
| 106 |
)
|
| 107 |
|
|
|
|
| 109 |
if uploaded_file is not None:
|
| 110 |
# Parse the file
|
| 111 |
parser = LlamaParse(
|
| 112 |
+
api_key=parse_token, # Can also be set in your env as LLAMA_CLOUD_API_KEY
|
| 113 |
result_type="text" # "markdown" and "text" are available
|
| 114 |
)
|
| 115 |
|
|
|
|
| 139 |
|
| 140 |
prompt_txt = 'Summarize this document in a 3-5 sentences.'
|
| 141 |
prompt = st.text_area(
|
| 142 |
+
label="Enter your query.",
|
| 143 |
key="prompt_widget",
|
| 144 |
value=prompt_txt
|
| 145 |
)
|
|
|
|
| 154 |
tab1, tab2 = st.tabs(["Uploaded File", "Parsed File",])
|
| 155 |
|
| 156 |
with tab1:
|
|
|
|
| 157 |
if uploaded_file is not None: # Display the pdf
|
| 158 |
bytes_data = uploaded_file.getvalue()
|
| 159 |
pdf_viewer(input=bytes_data, width=700)
|
| 160 |
|
| 161 |
with tab2:
|
|
|
|
| 162 |
if parsed_document is not None: # Showed the raw parsing result
|
| 163 |
st.write(parsed_document)
|
| 164 |
|