Spaces:

beautiful-code
/

ai_workflows

Runtime error

App Files Files Community

til_v2

by theRealNG - opened Jun 25, 2024

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+87

-33

Files changed (2) hide show

crew/til.py +77 -21
test.py +10 -12

crew/til.py CHANGED Viewed

@@ -8,15 +8,61 @@ import pprint
 HIGH_IMPACT_THRESHOLD = 8
 LOW_IMPACT_THRESHOLD = 7
-OPENAI_MODEL = "gpt-4o"
-# OPENAI_MODEL = "gpt-3.5-turbo"
 class TilCrew:
     def kickoff(self, inputs={}):
-        self.content = inputs["content"]
         self._gather_feedback()
         return self._final_call_on_feedback()
     def _final_call_on_feedback(self):
         final_results = []
         for feedback in self.feedback_results:
@@ -62,7 +108,8 @@ class TilCrew:
     def _gather_feedback(self):
         feedback_chain = self._build_feedback_chain()
         pprint.pp("Analysing the TIL.....")
-        self.feedback_results = feedback_chain.invoke({"til_content": self.content})['tils']
         print("Feedback: ")
         pprint.pp(self.feedback_results)
@@ -70,14 +117,14 @@ class TilCrew:
         feedback_parser = JsonOutputParser(pydantic_object=TilFeedbackResults)
         feedback_prompt = ChatPromptTemplate.from_messages([
             SystemMessage(
-                "You are a 'Personal TIL Reviewer' who works in a Product Engineering Services company. Your responsibility is to guide the user to write better TILs. "
-                "Your personal goal is to review a user's list of TILs and suggeste edits based on the following criteria:\n"
-                "1. Is the TIL insightful?"
-                "2. Is the TIL factually correct and accurate?"
-                "3. Is the TIL written in simple english?"
-                "4. Is the TIL grammatically correct?\n"
-                "Can you provide a score for on the scale of 10 for each of the TIL on each of these criteria and provide reasons for the score, "
                 " the reason/feedback should be presented in the Point Of View of the Reviewer and the feedback should be direct."
                 f"Formatting Instructions: {feedback_parser.get_format_instructions()}"
             ),
@@ -92,22 +139,31 @@ class TilCrew:
 class TilFeedbackResult(BaseModel):
-    til: str = Field(description="TIL as exactly captured by the user without any modifications.")
     insightful_score: int = Field(
-        description="TIL scores should be based solely on insightful criteria, with no other factors considered.")
-    insightful_reason: str = Field(description="Feedback for low insightful_score if it is not 10")
     factuality_score: int = Field(
-        description="TIL scores should be based solely on factuality criteria, with no other factors considered.")
-    factuality_reason: str = Field(description="Feedback for low factuality_score if it is not 10")
     simplicity_score: int = Field(
-        description="TIL scores should be based solely on simplicity criteria, with no other factors considered.")
-    simplicity_reason: str = Field(description="Feedback for low simplicity_score if it is not 10")
     grammatical_score: int = Field(
-        description="TIL scores should be based solely on grammatical criteria, with no other factors considered.")
-    grammatical_reason: str = Field(description="Feedback for low grammatical_score if it is not 10")
     final_suggestion: str = Field(
         description="Final suggested version of the TIL")
 class TilFeedbackResults(BaseModel):
     tils: List[TilFeedbackResult]

 HIGH_IMPACT_THRESHOLD = 8
 LOW_IMPACT_THRESHOLD = 7
+# OPENAI_MODEL = "gpt-4o"
+OPENAI_MODEL = "gpt-3.5-turbo"
 class TilCrew:
     def kickoff(self, inputs={}):
+        self.notes = inputs["notes"]
+        self._extract_tils()
         self._gather_feedback()
         return self._final_call_on_feedback()
+    def _extract_tils(self):
+        tils_parser = JsonOutputParser(pydantic_object=TodayILearneds)
+        few_shot_examples = [
+            {
+                "role": "HUMAN",
+                "content": "I went through the following course on quantization of LLM: https://www.deeplearning.ai/short-courses/quantization-in-depth/ and here are my insights: \n"
+                "Quantization is the process of reducing the size of LLM models by reducing the underlying weights. "
+                "The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting. "
+                "Advantages: takes lesser space and increases compute speed. "
+                "Disadvantages: Answers are less precise.\n\n"
+            },
+            {
+                "role": "AI",
+                "content": """
+```json
+  {
+    "tils": [
+      "Quantization is the process of reducing the size of LLM models by reducing the underlying weights.",
+      "The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting.",
+      "Advantages: takes lesser space and increases compute speed.",
+      "Disadvantages: Answers are less precise.",
+    ]
+  }
+```
+"""
+            },
+        ]
+        extract_tils_prompt = ChatPromptTemplate.from_messages([
+            SystemMessage(
+                "You are a 'Personal Today I Learned Extractor' who works in a Product Engineering Services company. "
+                "Your responsibility is to extract the TILs from user notes."
+                "Your personal goal is to review a user's list of notes and extract Today I Learned from his notes "
+                "without rephrasing user's sentences at all. Never rephrase user's words while extracting TILs.\n\n"
+                f"Here are few examples:\n {few_shot_examples}\n\n"
+                f"Formatting Instructions: {tils_parser.get_format_instructions()}"
+            ),
+            HumanMessagePromptTemplate.from_template("{notes}")
+        ])
+        llm = ChatOpenAI(model=OPENAI_MODEL, temperature=0.2)
+        extraction_chain = extract_tils_prompt | llm | tils_parser
+        self.content = extraction_chain.invoke({"notes": self.notes})
+        pprint.pp(self.content)
     def _final_call_on_feedback(self):
         final_results = []
         for feedback in self.feedback_results:
     def _gather_feedback(self):
         feedback_chain = self._build_feedback_chain()
         pprint.pp("Analysing the TIL.....")
+        self.feedback_results = feedback_chain.invoke(
+            {"til_content": self.content})['tils']
         print("Feedback: ")
         pprint.pp(self.feedback_results)
         feedback_parser = JsonOutputParser(pydantic_object=TilFeedbackResults)
         feedback_prompt = ChatPromptTemplate.from_messages([
             SystemMessage(
+                "You are a 'Personal Today I Learned Reviewer' who works in a Product Engineering Services company. Your responsibility is to guide the user to write better TILs. "
+                "Your personal goal is to review a user's list of 'Today I Learned'  and suggeste edits based on the following criteria:\n"
+                "1. Is the 'Today I Learned' insightful?"
+                "2. Is the 'Today I Learned' factually correct and accurate?"
+                "3. Is the 'Today I Learned' written in simple english?"
+                "4. Is the 'Today I Learned' grammatically correct?\n"
+                "Can you provide a score for on the scale of 10 for each of the 'Today I Learned' on each of these criteria and provide reasons for the score, "
                 " the reason/feedback should be presented in the Point Of View of the Reviewer and the feedback should be direct."
                 f"Formatting Instructions: {feedback_parser.get_format_instructions()}"
             ),
 class TilFeedbackResult(BaseModel):
+    til: str = Field(
+        description="'Today I Learned' as exactly captured by the user without any modifications.")
     insightful_score: int = Field(
+        description="'Today I Learned' scores should be based solely on insightful criteria, with no other factors considered. Don't consider conciseness while evaluating.")
+    insightful_reason: str = Field(
+        description="Feedback for low insightful_score if it is not 10")
     factuality_score: int = Field(
+        description="'Today I Learned' scores should be based solely on factuality criteria, with no other factors considered.")
+    factuality_reason: str = Field(
+        description="Feedback for low factuality_score if it is not 10")
     simplicity_score: int = Field(
+        description="'Today I Learned' scores should be based solely on simplicity criteria, with no other factors considered.")
+    simplicity_reason: str = Field(
+        description="Feedback for low simplicity_score if it is not 10")
     grammatical_score: int = Field(
+        description="'Today I Learned' scores should be based solely on grammatical criteria, with no other factors considered.")
+    grammatical_reason: str = Field(
+        description="Feedback for low grammatical_score if it is not 10")
     final_suggestion: str = Field(
         description="Final suggested version of the TIL")
 class TilFeedbackResults(BaseModel):
     tils: List[TilFeedbackResult]
+class TodayILearneds(BaseModel):
+    tils: List[str]

test.py CHANGED Viewed

@@ -24,18 +24,15 @@ def main():
         unsafe_allow_html=True
     )
     til_content = st.text_area('Enter what you learnt today:',
-                               "I went through the following course on quantization of LLM: https://www.deeplearning.ai/short-courses/quantization-in-depth/ and here are my insights: \n"
-                               "* Quantization is the process of reducing the size of LLM models by reducing the underlying weights.\n"
-                               "* The weights are reduced by scaling down the datatypes from a datatype that takes larger space to a data type that takes a smaller space, this is also known as downcasting.\n"
-                               "* Advantages: takes lesser space and increases compute speed\n"
-                               "* Disadvantages: Answers are less precise\n\n"
                                "My notes on Synthetic data:\n"
-                               "* Why is it needed? Product Testing, Training ML Algos, Reduced constraints when using reglated data.\n"
-                               "* Types: Fully Synthetic data, Partially Synthetic data\n"
-                               "* Varities: Text, Tabular, Media.\n"
-                               "* Benchmarks to consider: Accuracy, Privacy\n"
-                               "* Techniques: Statistical distribution, Agent to model, Using Deep Learning/Generative AI, Synthetic Minority Oversampling Technique.",
-                               key='til_content', help='Enter what you learnt today')
     if st.button("Get Feedback"):
         with st.status(
@@ -45,7 +42,7 @@ def main():
                 log_container = st.empty()
                 with stdout(log_container.code, terminator=""):
                     feedback = TilCrew()
-                    inputs = {"content": til_content}
                     results = feedback.kickoff(inputs=inputs)
             status.update(
                 label="✅ Feedback ready!",
@@ -54,6 +51,7 @@ def main():
             )
         for result in results:
             st.markdown(f"#### TIL: {result['til']}")
             st.markdown(f"**Feedback:** {result['feedback']}")
             if result['feedback'] == "not_ok":

         unsafe_allow_html=True
     )
     til_content = st.text_area('Enter what you learnt today:',
                                "My notes on Synthetic data:\n"
+                               "What: Synthetic data is information that is not generated by real-world occurrences but is artificially generated. "
+                               "Why is it needed? Product Testing, Training ML Algos, Reduced constraints when using reglated data."
+                               "Types: Fully Synthetic data, Partially Synthetic data"
+                               "Varities: Text, Tabular, Media."
+                               "Benchmarks to consider: Accuracy, Privacy."
+                               "Notes from https://www.turing.com/kb/synthetic-data-generation-techniques."
+                               "Techniques: Statistical distribution, Agent to model, Using Deep Learning/Generative AI, Synthetic Minority Oversampling Technique.",
+                               key='til_content', help='Enter what you learnt today', height=300)
     if st.button("Get Feedback"):
         with st.status(
                 log_container = st.empty()
                 with stdout(log_container.code, terminator=""):
                     feedback = TilCrew()
+                    inputs = {"notes": til_content}
                     results = feedback.kickoff(inputs=inputs)
             status.update(
                 label="✅ Feedback ready!",
             )
         for result in results:
+            # st.markdown(result)
             st.markdown(f"#### TIL: {result['til']}")
             st.markdown(f"**Feedback:** {result['feedback']}")
             if result['feedback'] == "not_ok":