theRealNG commited on
Commit
9487604
·
1 Parent(s): 15a130b
Files changed (2) hide show
  1. crew/til.py +77 -21
  2. test.py +10 -12
crew/til.py CHANGED
@@ -8,15 +8,61 @@ import pprint
8
 
9
  HIGH_IMPACT_THRESHOLD = 8
10
  LOW_IMPACT_THRESHOLD = 7
11
- OPENAI_MODEL = "gpt-4o"
12
- # OPENAI_MODEL = "gpt-3.5-turbo"
 
13
 
14
  class TilCrew:
15
  def kickoff(self, inputs={}):
16
- self.content = inputs["content"]
 
17
  self._gather_feedback()
18
  return self._final_call_on_feedback()
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def _final_call_on_feedback(self):
21
  final_results = []
22
  for feedback in self.feedback_results:
@@ -62,7 +108,8 @@ class TilCrew:
62
  def _gather_feedback(self):
63
  feedback_chain = self._build_feedback_chain()
64
  pprint.pp("Analysing the TIL.....")
65
- self.feedback_results = feedback_chain.invoke({"til_content": self.content})['tils']
 
66
  print("Feedback: ")
67
  pprint.pp(self.feedback_results)
68
 
@@ -70,14 +117,14 @@ class TilCrew:
70
  feedback_parser = JsonOutputParser(pydantic_object=TilFeedbackResults)
71
  feedback_prompt = ChatPromptTemplate.from_messages([
72
  SystemMessage(
73
- "You are a 'Personal TIL Reviewer' who works in a Product Engineering Services company. Your responsibility is to guide the user to write better TILs. "
74
- "Your personal goal is to review a user's list of TILs and suggeste edits based on the following criteria:\n"
75
- "1. Is the TIL insightful?"
76
- "2. Is the TIL factually correct and accurate?"
77
- "3. Is the TIL written in simple english?"
78
- "4. Is the TIL grammatically correct?\n"
79
-
80
- "Can you provide a score for on the scale of 10 for each of the TIL on each of these criteria and provide reasons for the score, "
81
  " the reason/feedback should be presented in the Point Of View of the Reviewer and the feedback should be direct."
82
  f"Formatting Instructions: {feedback_parser.get_format_instructions()}"
83
  ),
@@ -92,22 +139,31 @@ class TilCrew:
92
 
93
 
94
  class TilFeedbackResult(BaseModel):
95
- til: str = Field(description="TIL as exactly captured by the user without any modifications.")
 
96
  insightful_score: int = Field(
97
- description="TIL scores should be based solely on insightful criteria, with no other factors considered.")
98
- insightful_reason: str = Field(description="Feedback for low insightful_score if it is not 10")
 
99
  factuality_score: int = Field(
100
- description="TIL scores should be based solely on factuality criteria, with no other factors considered.")
101
- factuality_reason: str = Field(description="Feedback for low factuality_score if it is not 10")
 
102
  simplicity_score: int = Field(
103
- description="TIL scores should be based solely on simplicity criteria, with no other factors considered.")
104
- simplicity_reason: str = Field(description="Feedback for low simplicity_score if it is not 10")
 
105
  grammatical_score: int = Field(
106
- description="TIL scores should be based solely on grammatical criteria, with no other factors considered.")
107
- grammatical_reason: str = Field(description="Feedback for low grammatical_score if it is not 10")
 
108
  final_suggestion: str = Field(
109
  description="Final suggested version of the TIL")
110
 
111
 
112
  class TilFeedbackResults(BaseModel):
113
  tils: List[TilFeedbackResult]
 
 
 
 
 
8
 
9
  HIGH_IMPACT_THRESHOLD = 8
10
  LOW_IMPACT_THRESHOLD = 7
11
+ # OPENAI_MODEL = "gpt-4o"
12
+ OPENAI_MODEL = "gpt-3.5-turbo"
13
+
14
 
15
  class TilCrew:
16
  def kickoff(self, inputs={}):
17
+ self.notes = inputs["notes"]
18
+ self._extract_tils()
19
  self._gather_feedback()
20
  return self._final_call_on_feedback()
21
 
22
+ def _extract_tils(self):
23
+ tils_parser = JsonOutputParser(pydantic_object=TodayILearneds)
24
+ few_shot_examples = [
25
+ {
26
+ "role": "HUMAN",
27
+ "content": "I went through the following course on quantization of LLM: https://www.deeplearning.ai/short-courses/quantization-in-depth/ and here are my insights: \n"
28
+ "Quantization is the process of reducing the size of LLM models by reducing the underlying weights. "
29
+ "The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting. "
30
+ "Advantages: takes lesser space and increases compute speed. "
31
+ "Disadvantages: Answers are less precise.\n\n"
32
+ },
33
+ {
34
+ "role": "AI",
35
+ "content": """
36
+ ```json
37
+ {
38
+ "tils": [
39
+ "Quantization is the process of reducing the size of LLM models by reducing the underlying weights.",
40
+ "The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting.",
41
+ "Advantages: takes lesser space and increases compute speed.",
42
+ "Disadvantages: Answers are less precise.",
43
+ ]
44
+ }
45
+ ```
46
+ """
47
+ },
48
+ ]
49
+ extract_tils_prompt = ChatPromptTemplate.from_messages([
50
+ SystemMessage(
51
+ "You are a 'Personal Today I Learned Extractor' who works in a Product Engineering Services company. "
52
+ "Your responsibility is to extract the TILs from user notes."
53
+ "Your personal goal is to review a user's list of notes and extract Today I Learned from his notes "
54
+ "without rephrasing user's sentences at all. Never rephrase user's words while extracting TILs.\n\n"
55
+ f"Here are few examples:\n {few_shot_examples}\n\n"
56
+ f"Formatting Instructions: {tils_parser.get_format_instructions()}"
57
+ ),
58
+ HumanMessagePromptTemplate.from_template("{notes}")
59
+ ])
60
+ llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0.2)
61
+ extraction_chain = extract_tils_prompt | llm | tils_parser
62
+
63
+ self.content = extraction_chain.invoke({"notes": self.notes})
64
+ pprint.pp(self.content)
65
+
66
  def _final_call_on_feedback(self):
67
  final_results = []
68
  for feedback in self.feedback_results:
 
108
  def _gather_feedback(self):
109
  feedback_chain = self._build_feedback_chain()
110
  pprint.pp("Analysing the TIL.....")
111
+ self.feedback_results = feedback_chain.invoke(
112
+ {"til_content": self.content})['tils']
113
  print("Feedback: ")
114
  pprint.pp(self.feedback_results)
115
 
 
117
  feedback_parser = JsonOutputParser(pydantic_object=TilFeedbackResults)
118
  feedback_prompt = ChatPromptTemplate.from_messages([
119
  SystemMessage(
120
+ "You are a 'Personal Today I Learned Reviewer' who works in a Product Engineering Services company. Your responsibility is to guide the user to write better TILs. "
121
+ "Your personal goal is to review a user's list of 'Today I Learned' and suggeste edits based on the following criteria:\n"
122
+ "1. Is the 'Today I Learned' insightful?"
123
+ "2. Is the 'Today I Learned' factually correct and accurate?"
124
+ "3. Is the 'Today I Learned' written in simple english?"
125
+ "4. Is the 'Today I Learned' grammatically correct?\n"
126
+
127
+ "Can you provide a score for on the scale of 10 for each of the 'Today I Learned' on each of these criteria and provide reasons for the score, "
128
  " the reason/feedback should be presented in the Point Of View of the Reviewer and the feedback should be direct."
129
  f"Formatting Instructions: {feedback_parser.get_format_instructions()}"
130
  ),
 
139
 
140
 
141
  class TilFeedbackResult(BaseModel):
142
+ til: str = Field(
143
+ description="'Today I Learned' as exactly captured by the user without any modifications.")
144
  insightful_score: int = Field(
145
+ description="'Today I Learned' scores should be based solely on insightful criteria, with no other factors considered. Don't consider conciseness while evaluating.")
146
+ insightful_reason: str = Field(
147
+ description="Feedback for low insightful_score if it is not 10")
148
  factuality_score: int = Field(
149
+ description="'Today I Learned' scores should be based solely on factuality criteria, with no other factors considered.")
150
+ factuality_reason: str = Field(
151
+ description="Feedback for low factuality_score if it is not 10")
152
  simplicity_score: int = Field(
153
+ description="'Today I Learned' scores should be based solely on simplicity criteria, with no other factors considered.")
154
+ simplicity_reason: str = Field(
155
+ description="Feedback for low simplicity_score if it is not 10")
156
  grammatical_score: int = Field(
157
+ description="'Today I Learned' scores should be based solely on grammatical criteria, with no other factors considered.")
158
+ grammatical_reason: str = Field(
159
+ description="Feedback for low grammatical_score if it is not 10")
160
  final_suggestion: str = Field(
161
  description="Final suggested version of the TIL")
162
 
163
 
164
  class TilFeedbackResults(BaseModel):
165
  tils: List[TilFeedbackResult]
166
+
167
+
168
+ class TodayILearneds(BaseModel):
169
+ tils: List[str]
test.py CHANGED
@@ -24,18 +24,15 @@ def main():
24
  unsafe_allow_html=True
25
  )
26
  til_content = st.text_area('Enter what you learnt today:',
27
- "I went through the following course on quantization of LLM: https://www.deeplearning.ai/short-courses/quantization-in-depth/ and here are my insights: \n"
28
- "* Quantization is the process of reducing the size of LLM models by reducing the underlying weights.\n"
29
- "* The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting.\n"
30
- "* Advantages: takes lesser space and increases compute speed\n"
31
- "* Disadvantages: Answers are less precise\n\n"
32
  "My notes on Synthetic data:\n"
33
- "* Why is it needed? Product Testing, Training ML Algos, Reduced constraints when using reglated data.\n"
34
- "* Types: Fully Synthetic data, Partially Synthetic data\n"
35
- "* Varities: Text, Tabular, Media.\n"
36
- "* Benchmarks to consider: Accuracy, Privacy\n"
37
- "* Techniques: Statistical distribution, Agent to model, Using Deep Learning/Generative AI, Synthetic Minority Oversampling Technique.",
38
- key='til_content', help='Enter what you learnt today')
 
 
39
 
40
  if st.button("Get Feedback"):
41
  with st.status(
@@ -45,7 +42,7 @@ def main():
45
  log_container = st.empty()
46
  with stdout(log_container.code, terminator=""):
47
  feedback = TilCrew()
48
- inputs = {"content": til_content}
49
  results = feedback.kickoff(inputs=inputs)
50
  status.update(
51
  label="✅ Feedback ready!",
@@ -54,6 +51,7 @@ def main():
54
  )
55
 
56
  for result in results:
 
57
  st.markdown(f"#### TIL: {result['til']}")
58
  st.markdown(f"**Feedback:** {result['feedback']}")
59
  if result['feedback'] == "not_ok":
 
24
  unsafe_allow_html=True
25
  )
26
  til_content = st.text_area('Enter what you learnt today:',
 
 
 
 
 
27
  "My notes on Synthetic data:\n"
28
+ "What: Synthetic data is information that is not generated by real-world occurrences but is artificially generated. "
29
+ "Why is it needed? Product Testing, Training ML Algos, Reduced constraints when using reglated data."
30
+ "Types: Fully Synthetic data, Partially Synthetic data"
31
+ "Varities: Text, Tabular, Media."
32
+ "Benchmarks to consider: Accuracy, Privacy."
33
+ "Notes from https://www.turing.com/kb/synthetic-data-generation-techniques."
34
+ "Techniques: Statistical distribution, Agent to model, Using Deep Learning/Generative AI, Synthetic Minority Oversampling Technique.",
35
+ key='til_content', help='Enter what you learnt today', height=300)
36
 
37
  if st.button("Get Feedback"):
38
  with st.status(
 
42
  log_container = st.empty()
43
  with stdout(log_container.code, terminator=""):
44
  feedback = TilCrew()
45
+ inputs = {"notes": til_content}
46
  results = feedback.kickoff(inputs=inputs)
47
  status.update(
48
  label="✅ Feedback ready!",
 
51
  )
52
 
53
  for result in results:
54
+ # st.markdown(result)
55
  st.markdown(f"#### TIL: {result['til']}")
56
  st.markdown(f"**Feedback:** {result['feedback']}")
57
  if result['feedback'] == "not_ok":