#%% # processing functions # lead ifixit infos from langchain.chat_models import init_chat_model from langchain.output_parsers import StructuredOutputParser, ResponseSchema from langchain.prompts import ChatPromptTemplate # import langchain groq package from helper_functions.llm_client_initialization import llm_langchain_client_init import re def information_extractor(conversation): """ Extracts the device, brand, model and issue from the conversation using a language model. Input: entire diagnosis conversation between bot and user Output: dictionary with device, brand, model and issue """ # Initializing the Grog Client llm = llm_langchain_client_init() # Defining the information we want to extract from the response schemas = [ ResponseSchema(name="device", description="Device or appliance mentioned, if any"), # one line per information that we are trying to extract ResponseSchema(name="brand", description="Brand of device or appliance mentioned, if any"), # one line per information that we are trying to extract ResponseSchema(name="model", description="Model of device or appliance mentioned, if any"), # one line per information that we are trying to extract ResponseSchema(name="issue", description="Main issue or concern with device or appliance, if any. Leave empty if none.") ] # Initialization of the parser parser = StructuredOutputParser.from_response_schemas(schemas) print(parser) # Defining the chat prompt template prompt = ChatPromptTemplate.from_messages([ ("system", "Extract the following info from the message of the user."), ("user", "{text}\n\n{format_instructions}") ]) # Defining parsing instructions for the output #Theoretically, the parser is to generate the parsing instructions automatically, # but it cannot tell the client to remove the comments in the output, which leads to errors when parsing the json downstream parsing_instructions =''' The output should be a markdown code snippet formatted in the following schema, including the leading and trailing "```json" and "```", without any additional text or comments: ```json { "device": string // Device or appliance mentioned, if any "brand": string // Brand of device or appliance mentioned, if any "model": string // Model of device or appliance mentioned, if any "issue": string // Main issue or concern with device or appliance, if any. Leave empty if none. } ``` ''' # Formattig the prompt with last message in the conversation and instructions to parse the information from that message formatted_prompt = prompt.format_prompt( text=conversation[-1][1], # The last message in the conversation format_instructions=parsing_instructions ) print('Text parsed by LLM: ', conversation[-1][1]) # Calling the LLM with the formatted prompt and parsing the output output = llm(formatted_prompt.to_messages()) print("Output Content: " + output.content) cleaned_content = re.sub(r'//.*$', '', output.content, flags=re.MULTILINE) print("Cleanded Content: " + cleaned_content) parsed_content = parser.parse(cleaned_content) print("Dictionary: ", parsed_content) return parsed_content