File size: 8,099 Bytes
3f49c4a
 
 
 
 
514a39a
ec4a7eb
3f49c4a
 
 
9487604
 
 
3f49c4a
 
 
9487604
 
3f49c4a
 
 
9487604
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f49c4a
514a39a
 
 
 
 
 
3f49c4a
 
514a39a
 
 
 
 
3f49c4a
514a39a
 
 
 
 
3f49c4a
514a39a
 
 
 
 
 
3f49c4a
514a39a
 
 
 
 
 
 
 
 
3f49c4a
514a39a
 
 
3f49c4a
 
 
ec4a7eb
9487604
 
ec4a7eb
514a39a
3f49c4a
 
514a39a
3f49c4a
 
9487604
 
 
 
 
 
 
 
e1c5a11
3f49c4a
 
 
 
ec4a7eb
 
3f49c4a
 
 
 
 
 
 
9487604
 
3f49c4a
9487604
 
 
3f49c4a
9487604
 
 
3f49c4a
9487604
 
 
3f49c4a
9487604
 
 
3f49c4a
 
514a39a
 
 
 
9487604
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain_core.messages import SystemMessage
from pydantic import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser
from langchain_openai import ChatOpenAI
from typing import List
import pprint

HIGH_IMPACT_THRESHOLD = 8
LOW_IMPACT_THRESHOLD = 7
# OPENAI_MODEL = "gpt-4o"
OPENAI_MODEL = "gpt-3.5-turbo"


class TilCrew:
    def kickoff(self, inputs={}):
        self.notes = inputs["notes"]
        self._extract_tils()
        self._gather_feedback()
        return self._final_call_on_feedback()

    def _extract_tils(self):
        tils_parser = JsonOutputParser(pydantic_object=TodayILearneds)
        few_shot_examples = [
            {
                "role": "HUMAN",
                "content": "I went through the following course on quantization of LLM: https://www.deeplearning.ai/short-courses/quantization-in-depth/ and here are my insights: \n"
                "Quantization is the process of reducing the size of LLM models by reducing the underlying weights. "
                "The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting. "
                "Advantages: takes lesser space and increases compute speed. "
                "Disadvantages: Answers are less precise.\n\n"
            },
            {
                "role": "AI",
                "content": """
```json
  {
    "tils": [
      "Quantization is the process of reducing the size of LLM models by reducing the underlying weights.",
      "The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting.",
      "Advantages: takes lesser space and increases compute speed.",
      "Disadvantages: Answers are less precise.",
    ]
  }
```
"""
            },
        ]
        extract_tils_prompt = ChatPromptTemplate.from_messages([
            SystemMessage(
                "You are a 'Personal Today I Learned Extractor' who works in a Product Engineering Services company. "
                "Your responsibility is to extract the TILs from user notes."
                "Your personal goal is to review a user's list of notes and extract Today I Learned from his notes "
                "without rephrasing user's sentences at all. Never rephrase user's words while extracting TILs.\n\n"
                f"Here are few examples:\n {few_shot_examples}\n\n"
                f"Formatting Instructions: {tils_parser.get_format_instructions()}"
            ),
            HumanMessagePromptTemplate.from_template("{notes}")
        ])
        llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0.2)
        extraction_chain = extract_tils_prompt | llm | tils_parser

        self.content = extraction_chain.invoke({"notes": self.notes})
        pprint.pp(self.content)

    def _final_call_on_feedback(self):
        final_results = []
        for feedback in self.feedback_results:
            print("Final analysis of:")
            pprint.pp(feedback)
            result = {
                "til": feedback.get('til', ""),
                "feedback": "not_ok",
            }
            if feedback["factuality_score"] < HIGH_IMPACT_THRESHOLD:
                result["feedback_criteria"] = "factuality_feedback"
                result["reason"] = feedback["factuality_reason"]
                final_results = final_results + [result]
                continue

            if feedback["insightful_score"] < HIGH_IMPACT_THRESHOLD:
                result["feedback_criteria"] = "insightful_feedback"
                result["reason"] = feedback["insightful_reason"]
                final_results = final_results + [result]
                continue

            if feedback["simplicity_score"] < LOW_IMPACT_THRESHOLD:
                result["feedback_criteria"] = "simplicity_feedback"
                result["reason"] = feedback["simplicity_reason"]
                result["suggestion"] = feedback["final_suggestion"]
                final_results = final_results + [result]
                continue

            if feedback["grammatical_score"] < LOW_IMPACT_THRESHOLD:
                result["feedback_criteria"] = "grammatical_feedback"
                result["reason"] = feedback["grammatical_reason"]
                result["suggestion"] = feedback["final_suggestion"]
                final_results = final_results + [result]
                continue

            result["feedback"] = "ok"
            final_results = final_results + [result]

        print("Final Results:")
        pprint.pp(final_results)
        return final_results

    def _gather_feedback(self):
        feedback_chain = self._build_feedback_chain()
        pprint.pp("Analysing the TIL.....")
        self.feedback_results = feedback_chain.invoke(
            {"til_content": self.content})['tils']
        print("Feedback: ")
        pprint.pp(self.feedback_results)

    def _build_feedback_chain(self):
        feedback_parser = JsonOutputParser(pydantic_object=TilFeedbackResults)
        feedback_prompt = ChatPromptTemplate.from_messages([
            SystemMessage(
                "You are a 'Personal Today I Learned Reviewer' who works in a Product Engineering Services company. Your responsibility is to guide the user to write better TILs. "
                "Your personal goal is to review a user's list of 'Today I Learned'  and suggeste edits based on the following criteria:\n"
                "1. Is the 'Today I Learned' insightful?"
                "2. Is the 'Today I Learned' factually correct and accurate?"
                "3. Is the 'Today I Learned' written in simple english?"
                "4. Is the 'Today I Learned' grammatically correct?\n"

                "Can you provide a score for on the scale of 10 for each of the 'Today I Learned' on each of these criteria and provide reasons for the score, "
                " the reason/feedback should be presented in the Point Of View of the Reviewer and the feedback should be direct."
                f"Formatting Instructions: {feedback_parser.get_format_instructions()}"
            ),
            HumanMessagePromptTemplate.from_template("{til_content}")
        ])
        print("Prompt: ")
        pprint.pp(feedback_prompt, width=80)
        llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0.2)
        analysis_chain = feedback_prompt | llm | feedback_parser

        return analysis_chain


class TilFeedbackResult(BaseModel):
    til: str = Field(
        description="'Today I Learned' as exactly captured by the user without any modifications.")
    insightful_score: int = Field(
        description="'Today I Learned' scores should be based solely on insightful criteria, with no other factors considered. Don't consider conciseness while evaluating.")
    insightful_reason: str = Field(
        description="Feedback for low insightful_score if it is not 10")
    factuality_score: int = Field(
        description="'Today I Learned' scores should be based solely on factuality criteria, with no other factors considered.")
    factuality_reason: str = Field(
        description="Feedback for low factuality_score if it is not 10")
    simplicity_score: int = Field(
        description="'Today I Learned' scores should be based solely on simplicity criteria, with no other factors considered.")
    simplicity_reason: str = Field(
        description="Feedback for low simplicity_score if it is not 10")
    grammatical_score: int = Field(
        description="'Today I Learned' scores should be based solely on grammatical criteria, with no other factors considered.")
    grammatical_reason: str = Field(
        description="Feedback for low grammatical_score if it is not 10")
    final_suggestion: str = Field(
        description="Final suggested version of the TIL")


class TilFeedbackResults(BaseModel):
    tils: List[TilFeedbackResult]


class TodayILearneds(BaseModel):
    tils: List[str]