RepAIr / chat_logic /diagnosis.py
w3680's picture
updated version
6848c8b
#%%
# processing functions
# lead ifixit infos
from langchain.chat_models import init_chat_model
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate
# import langchain groq package
from helper_functions.llm_client_initialization import llm_langchain_client_init
import re
def information_extractor(conversation):
"""
Extracts the device, brand, model and issue from the conversation using a language model.
Input: entire diagnosis conversation between bot and user
Output: dictionary with device, brand, model and issue
"""
# Initializing the Grog Client
llm = llm_langchain_client_init()
# Defining the information we want to extract from the response
schemas = [
ResponseSchema(name="device", description="Device or appliance mentioned, if any"), # one line per information that we are trying to extract
ResponseSchema(name="brand", description="Brand of device or appliance mentioned, if any"), # one line per information that we are trying to extract
ResponseSchema(name="model", description="Model of device or appliance mentioned, if any"), # one line per information that we are trying to extract
ResponseSchema(name="issue", description="Main issue or concern with device or appliance, if any. Leave empty if none.")
]
# Initialization of the parser
parser = StructuredOutputParser.from_response_schemas(schemas)
print(parser)
# Defining the chat prompt template
prompt = ChatPromptTemplate.from_messages([
("system", "Extract the following info from the message of the user."),
("user", "{text}\n\n{format_instructions}")
])
# Defining parsing instructions for the output
#Theoretically, the parser is to generate the parsing instructions automatically,
# but it cannot tell the client to remove the comments in the output, which leads to errors when parsing the json downstream
parsing_instructions ='''
The output should be a markdown code snippet formatted in the following schema,
including the leading and trailing "```json" and "```", without any additional text or comments:
```json
{
"device": string // Device or appliance mentioned, if any
"brand": string // Brand of device or appliance mentioned, if any
"model": string // Model of device or appliance mentioned, if any
"issue": string // Main issue or concern with device or appliance, if any. Leave empty if none.
}
```
'''
# Formattig the prompt with last message in the conversation and instructions to parse the information from that message
formatted_prompt = prompt.format_prompt(
text=conversation[-1][1], # The last message in the conversation
format_instructions=parsing_instructions
)
print('Text parsed by LLM: ', conversation[-1][1])
# Calling the LLM with the formatted prompt and parsing the output
output = llm(formatted_prompt.to_messages())
print("Output Content: " + output.content)
cleaned_content = re.sub(r'//.*$', '', output.content, flags=re.MULTILINE)
print("Cleanded Content: " + cleaned_content)
parsed_content = parser.parse(cleaned_content)
print("Dictionary: ", parsed_content)
return parsed_content