ai_workflows / crew /til.py
theRealNG's picture
wip v2
9487604
raw
history blame
8.1 kB
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage
from pydantic import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai import ChatOpenAI
from typing import List
import pprint
HIGH_IMPACT_THRESHOLD = 8
LOW_IMPACT_THRESHOLD = 7
# OPENAI_MODEL = "gpt-4o"
OPENAI_MODEL = "gpt-3.5-turbo"
class TilCrew:
def kickoff(self, inputs={}):
self.notes = inputs["notes"]
self._extract_tils()
self._gather_feedback()
return self._final_call_on_feedback()
def _extract_tils(self):
tils_parser = JsonOutputParser(pydantic_object=TodayILearneds)
few_shot_examples = [
{
"role": "HUMAN",
"content": "I went through the following course on quantization of LLM: https://www.deeplearning.ai/short-courses/quantization-in-depth/ and here are my insights: \n"
"Quantization is the process of reducing the size of LLM models by reducing the underlying weights. "
"The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting. "
"Advantages: takes lesser space and increases compute speed. "
"Disadvantages: Answers are less precise.\n\n"
},
{
"role": "AI",
"content": """
```json
{
"tils": [
"Quantization is the process of reducing the size of LLM models by reducing the underlying weights.",
"The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting.",
"Advantages: takes lesser space and increases compute speed.",
"Disadvantages: Answers are less precise.",
]
}
```
"""
},
]
extract_tils_prompt = ChatPromptTemplate.from_messages([
SystemMessage(
"You are a 'Personal Today I Learned Extractor' who works in a Product Engineering Services company. "
"Your responsibility is to extract the TILs from user notes."
"Your personal goal is to review a user's list of notes and extract Today I Learned from his notes "
"without rephrasing user's sentences at all. Never rephrase user's words while extracting TILs.\n\n"
f"Here are few examples:\n {few_shot_examples}\n\n"
f"Formatting Instructions: {tils_parser.get_format_instructions()}"
),
HumanMessagePromptTemplate.from_template("{notes}")
])
llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0.2)
extraction_chain = extract_tils_prompt | llm | tils_parser
self.content = extraction_chain.invoke({"notes": self.notes})
pprint.pp(self.content)
def _final_call_on_feedback(self):
final_results = []
for feedback in self.feedback_results:
print("Final analysis of:")
pprint.pp(feedback)
result = {
"til": feedback.get('til', ""),
"feedback": "not_ok",
}
if feedback["factuality_score"] < HIGH_IMPACT_THRESHOLD:
result["feedback_criteria"] = "factuality_feedback"
result["reason"] = feedback["factuality_reason"]
final_results = final_results + [result]
continue
if feedback["insightful_score"] < HIGH_IMPACT_THRESHOLD:
result["feedback_criteria"] = "insightful_feedback"
result["reason"] = feedback["insightful_reason"]
final_results = final_results + [result]
continue
if feedback["simplicity_score"] < LOW_IMPACT_THRESHOLD:
result["feedback_criteria"] = "simplicity_feedback"
result["reason"] = feedback["simplicity_reason"]
result["suggestion"] = feedback["final_suggestion"]
final_results = final_results + [result]
continue
if feedback["grammatical_score"] < LOW_IMPACT_THRESHOLD:
result["feedback_criteria"] = "grammatical_feedback"
result["reason"] = feedback["grammatical_reason"]
result["suggestion"] = feedback["final_suggestion"]
final_results = final_results + [result]
continue
result["feedback"] = "ok"
final_results = final_results + [result]
print("Final Results:")
pprint.pp(final_results)
return final_results
def _gather_feedback(self):
feedback_chain = self._build_feedback_chain()
pprint.pp("Analysing the TIL.....")
self.feedback_results = feedback_chain.invoke(
{"til_content": self.content})['tils']
print("Feedback: ")
pprint.pp(self.feedback_results)
def _build_feedback_chain(self):
feedback_parser = JsonOutputParser(pydantic_object=TilFeedbackResults)
feedback_prompt = ChatPromptTemplate.from_messages([
SystemMessage(
"You are a 'Personal Today I Learned Reviewer' who works in a Product Engineering Services company. Your responsibility is to guide the user to write better TILs. "
"Your personal goal is to review a user's list of 'Today I Learned' and suggeste edits based on the following criteria:\n"
"1. Is the 'Today I Learned' insightful?"
"2. Is the 'Today I Learned' factually correct and accurate?"
"3. Is the 'Today I Learned' written in simple english?"
"4. Is the 'Today I Learned' grammatically correct?\n"
"Can you provide a score for on the scale of 10 for each of the 'Today I Learned' on each of these criteria and provide reasons for the score, "
" the reason/feedback should be presented in the Point Of View of the Reviewer and the feedback should be direct."
f"Formatting Instructions: {feedback_parser.get_format_instructions()}"
),
HumanMessagePromptTemplate.from_template("{til_content}")
])
print("Prompt: ")
pprint.pp(feedback_prompt, width=80)
llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0.2)
analysis_chain = feedback_prompt | llm | feedback_parser
return analysis_chain
class TilFeedbackResult(BaseModel):
til: str = Field(
description="'Today I Learned' as exactly captured by the user without any modifications.")
insightful_score: int = Field(
description="'Today I Learned' scores should be based solely on insightful criteria, with no other factors considered. Don't consider conciseness while evaluating.")
insightful_reason: str = Field(
description="Feedback for low insightful_score if it is not 10")
factuality_score: int = Field(
description="'Today I Learned' scores should be based solely on factuality criteria, with no other factors considered.")
factuality_reason: str = Field(
description="Feedback for low factuality_score if it is not 10")
simplicity_score: int = Field(
description="'Today I Learned' scores should be based solely on simplicity criteria, with no other factors considered.")
simplicity_reason: str = Field(
description="Feedback for low simplicity_score if it is not 10")
grammatical_score: int = Field(
description="'Today I Learned' scores should be based solely on grammatical criteria, with no other factors considered.")
grammatical_reason: str = Field(
description="Feedback for low grammatical_score if it is not 10")
final_suggestion: str = Field(
description="Final suggested version of the TIL")
class TilFeedbackResults(BaseModel):
tils: List[TilFeedbackResult]
class TodayILearneds(BaseModel):
tils: List[str]