Spaces:
Sleeping
Sleeping
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| import streamlit as st | |
| import tiktoken | |
| from src.utils import load_config_values | |
| from src.dev_llm import FakeLLM | |
| from src.pdfParser import extract_text_from_pdf | |
| # Load in model and pipeline configuration values | |
| system_message, context_message, model_id = load_config_values( | |
| config_keys=[ | |
| "system_message", | |
| "context_message", | |
| "model_id", | |
| ] | |
| ) | |
| openai_tokeniser = tiktoken.encoding_for_model(model_id) | |
| # TODO: Change this to reflect prod model rather than dev models | |
| # Initalise fake values and a fake LLM to test out the full pipeline | |
| tmp_llm = FakeLLM() | |
| prompt = ChatPromptTemplate.from_template( | |
| template=context_message, | |
| ) | |
| chain = ( | |
| { | |
| "system_message": lambda x: system_message, | |
| "pdf_text": lambda x: uploaded_text, | |
| "data_to_extract": RunnablePassthrough() | |
| } | |
| |prompt | |
| |tmp_llm | |
| ) | |
| user_input_1 = st.text_input(label="input") | |
| uploaded_doc = st.file_uploader( | |
| label="Upload PDF:\n", | |
| type=".pdf" | |
| ) | |
| if uploaded_doc is not None: | |
| uploaded_text = extract_text_from_pdf(uploaded_doc) | |
| st.text(chain.invoke(user_input_1)) | |
| text_length = len(openai_tokeniser.encode(uploaded_text)) | |
| system_message_length = len(openai_tokeniser.encode(system_message)) | |
| context_message_length = len(openai_tokeniser.encode(context_message)) | |
| st.text(text_length + system_message_length + context_message_length) | |