Spaces:

beautiful-code
/

ai_workflows

Runtime error

App Files Files Community

theRealNG commited on Jul 4, 2024

Commit

0aba2e5

unverified ·

2 Parent(s): 3629877 4ad7a82

Merge pull request #19 from beautiful-code/course_lessons_extraction

Browse files

Files changed (24) hide show

.gitignore +1 -0
agents/article_evaluator.py +2 -2
agents/curiosity_catalyst.py +2 -2
agents/learning_curator.py +3 -3
agents/learning_profiler.py +2 -2
endpoints.py +3 -13
packages.txt +8 -0
requirements.txt +6 -0
tasks/create_article_pitch.py +1 -1
tools/scrape_website.py +0 -14
ui/article_recommendation.py +1 -1
ui/course_lessons_extractor.py +57 -0
ui/research_paper.py +2 -2
ui/til_feedback.py +1 -1
ui_main.py +16 -9
workflows/__init__.py +0 -0
{crew → workflows}/article_suggestion.py +0 -0
workflows/courses/lessons_extractor.py +90 -0
{crew → workflows}/research_article_suggester.py +1 -1
{crew → workflows}/til.py +13 -1
{tools → workflows/tools}/helpers.py +0 -0
workflows/tools/scrape_website.py +67 -0
{tools → workflows/tools}/search_web.py +0 -0
workflows/utils/feedback.py +8 -0

.gitignore CHANGED Viewed

@@ -7,3 +7,4 @@ evaluated_articles.json
 final_articles.json
 learning_profile.json
 pitched_articles.json

 final_articles.json
 learning_profile.json
 pitched_articles.json
+webpageScreenshot.png

agents/article_evaluator.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from crewai import Agent
 from llms.gpt import llm
-from tools.helpers import streamlit_callback
-from tools.scrape_website import scrape_tool
 article_evaluator = Agent(
     role="Recommended Article Evaluator",

 from crewai import Agent
 from llms.gpt import llm
+from workflows.tools.helpers import streamlit_callback
+from workflows.tools.scrape_website import scrape_tool
 article_evaluator = Agent(
     role="Recommended Article Evaluator",

agents/curiosity_catalyst.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from crewai import Agent
 from llms.gpt import llm
-from tools.helpers import streamlit_callback
-from tools.scrape_website import scrape_tool
 curiosity_catalyst = Agent(
   role="Curiosity Catalyst",

 from crewai import Agent
 from llms.gpt import llm
+from workflows.tools.helpers import streamlit_callback
+from workflows.tools.scrape_website import scrape_tool
 curiosity_catalyst = Agent(
   role="Curiosity Catalyst",

agents/learning_curator.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from crewai import Agent
 from llms.gpt import llm
-from tools.helpers import streamlit_callback
-from tools.scrape_website import scrape_tool
-from tools.search_web import search_tool
 learning_curator = Agent(
     role="Personal Learning Curator",

 from crewai import Agent
 from llms.gpt import llm
+from workflows.tools.helpers import streamlit_callback
+from workflows.tools.scrape_website import scrape_tool
+from workflows.tools.search_web import search_tool
 learning_curator = Agent(
     role="Personal Learning Curator",

agents/learning_profiler.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from crewai import Agent
 from llms.gpt import llm
-from tools.helpers import streamlit_callback
-from tools.scrape_website import scrape_tool
 learning_profiler = Agent(
     role="Personal Learning Profiler",

 from crewai import Agent
 from llms.gpt import llm
+from workflows.tools.helpers import streamlit_callback
+from workflows.tools.scrape_website import scrape_tool
 learning_profiler = Agent(
     role="Personal Learning Profiler",

endpoints.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from dotenv import load_dotenv
 import uvicorn
 from fastapi import FastAPI, Query
-from .crew.til import TilCrew, TilFeedbackResponse
 from fastapi.middleware.cors import CORSMiddleware
 from langsmith import Client
 from typing import List, Optional
@@ -45,22 +46,11 @@ async def til_feedback_kickoff(content: List[str]) -> TilFeedbackResponse:
     result = TilCrew().kickoff(inputs)
     return result
-class Feedback(BaseModel):
-    helpful_score: Optional[float]
-    feedback_on: Optional[str]
 @app.post("/til_feedback/{run_id}/feedback", tags=["til_feedback"])
 async def capture_feedback(run_id: UUID4, feedback: Feedback) -> str:
     print("Helful Score: ", feedback.helpful_score)
     print("Feedback On: ", feedback.feedback_on)
-    client = Client()
-    client.create_feedback(
-        str(run_id),
-        key="helpful",
-        score=feedback.helpful_score,
-        source_info={"til": feedback.feedback_on},
-        type="api",
-    )
     return "ok"
 @app.get("/healthcheck")

 from dotenv import load_dotenv
 import uvicorn
 from fastapi import FastAPI, Query
+from workflows.til import TilCrew, TilFeedbackResponse
+from workflows.utils.feedback import Feedback
 from fastapi.middleware.cors import CORSMiddleware
 from langsmith import Client
 from typing import List, Optional
     result = TilCrew().kickoff(inputs)
     return result
 @app.post("/til_feedback/{run_id}/feedback", tags=["til_feedback"])
 async def capture_feedback(run_id: UUID4, feedback: Feedback) -> str:
     print("Helful Score: ", feedback.helpful_score)
     print("Feedback On: ", feedback.feedback_on)
+    TilCrew.post_feedback(run_id=run_id, feedback=feedback)
     return "ok"
 @app.get("/healthcheck")

packages.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+libnss3
+libnspr4
+libatk1.0-0
+libatk-bridge2.0-0
+libcups2
+libatspi2.0-0
+libxcomposite1
+libxdamage1

requirements.txt CHANGED Viewed

@@ -14,3 +14,9 @@ uvicorn
 fastapi_cors
 langsmith
 pytest

 fastapi_cors
 langsmith
 pytest
+playwright
+playwright-stealth
+unstructured
+asyncio
+psutil
+pyppeteer

tasks/create_article_pitch.py CHANGED Viewed

@@ -9,7 +9,7 @@ from pydantic import BaseModel
 from typing import List
 from agents.curiosity_catalyst import curiosity_catalyst
-from tools.scrape_website import scrape_tool
 from tasks.create_learning_profile import learning_profile_task
 from tasks.evaluate_articles import evaluation_task

 from typing import List
 from agents.curiosity_catalyst import curiosity_catalyst
+from workflows.tools.scrape_website import scrape_tool
 from tasks.create_learning_profile import learning_profile_task
 from tasks.evaluate_articles import evaluation_task

tools/scrape_website.py DELETED Viewed

@@ -1,14 +0,0 @@
-from crewai_tools import ScrapeWebsiteTool
-import requests
-from bs4 import BeautifulSoup
-scrape_tool = ScrapeWebsiteTool()
-def CustomScrapeWebsiteTool(url):
-    response = requests.get(url)
-    parsed = BeautifulSoup(response.content, "html.parser")
-    text = parsed.get_text()
-    text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
-    text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
-    return text

ui/article_recommendation.py CHANGED Viewed

@@ -3,7 +3,7 @@ import json
 import streamlit as st
 import utils.settings as settings
-from crew.article_suggestion import article_recommendation_crew
 from utils.write_to_json import write_dict_to_json as write_dict_to_json
 load_dotenv()
 settings.init()

 import streamlit as st
 import utils.settings as settings
+from workflows.article_suggestion import article_recommendation_crew
 from utils.write_to_json import write_dict_to_json as write_dict_to_json
 load_dotenv()
 settings.init()

ui/course_lessons_extractor.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import streamlit as st
+import asyncio
+from contextlib import contextmanager
+from dotenv import load_dotenv
+from workflows.courses.lessons_extractor import LessonsExtractor
+from streamlit_extras.capture import stdout
+load_dotenv()
+@contextmanager
+def setup_event_loop():
+    loop = asyncio.new_event_loop()
+    asyncio.set_event_loop(loop)
+    try:
+        yield loop
+    finally:
+        loop.close()
+        asyncio.set_event_loop(None)
+def main():
+    st.markdown("<div class='container'>", unsafe_allow_html=True)
+    st.markdown(
+        """
+        <div class="centered">
+            <p class="title">Course Lesson Extractor</p>
+        </div>
+        """,
+        unsafe_allow_html=True
+    )
+    course_url = st.text_area('Enter the URL for the course:',
+                              "https://www.coursera.org/learn/google-data-analytics-capstone?specialization=google-data-analytics",
+                               key='course_url', help='Enter course you want to learn')
+    if st.button("Get Lessons"):
+        with st.status(
+            "🤖 **Extracting Lessons...**", state="running", expanded=True
+        ) as status:
+            with st.container(height=500, border=False):
+                log_container = st.empty()
+                with stdout(log_container.code, terminator=""):
+                    with setup_event_loop() as loop:
+                        extractor = LessonsExtractor()
+                        inputs = {"course_url": course_url}
+                        results = extractor.kickoff(inputs=inputs)["lessons"]
+            status.update(
+                label="✅ Extracted Lessons!",
+                state="complete",
+                expanded=False,
+            )
+        for idx, lesson in enumerate(results):
+            st.markdown(f"#### Lessons {idx}: {lesson['name']}")
+            st.markdown(f"Concpets: {', '.join(lesson['concepts'])}")
+if __name__ == "__main__":
+    main()

ui/research_paper.py CHANGED Viewed

@@ -1,11 +1,11 @@
 import streamlit as st
-from crew.research_article_suggester import RecentArticleSuggester
 from streamlit_extras.capture import stdout
 def main():
     st.markdown(
         """
         <style>

 import streamlit as st
+from workflows.research_article_suggester import RecentArticleSuggester
 from streamlit_extras.capture import stdout
 def main():
     st.markdown(
         """
         <style>

ui/til_feedback.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 from dotenv import load_dotenv
-from crew.til import TilCrew
 from streamlit_extras.capture import stdout
 load_dotenv()

 import streamlit as st
 from dotenv import load_dotenv
+from workflows.til import TilCrew
 from streamlit_extras.capture import stdout
 load_dotenv()

ui_main.py CHANGED Viewed

@@ -1,16 +1,20 @@
-import streamlit as st
-import math
 from dotenv import load_dotenv
 from streamlit_extras.stylable_container import stylable_container
-from PIL import Image
 from ui.article_recommendation import main as article_recommendor_main
 from ui.research_paper import main as research_article_suggester_main
 from ui.til_feedback import main as feedback_main
 load_dotenv()
-st.set_page_config(page_title='Multi-Page App', page_icon='📰', layout='wide')
 def load_css(file_name):
     with open(file_name) as f:
@@ -29,24 +33,27 @@ def main():
         research_article_suggester_main()
     elif st.session_state.page == "feedback":
         feedback_main()
 def show_main_page():
     css = load_css("ui/main.css")
     st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
-    st.markdown('<div class="main-title">Welcome to the Multi-Page App!</div>', unsafe_allow_html=True)
     st.markdown("---")
-    st.markdown('<div class="sub-header">Navigate to Specific Pages:</div>', unsafe_allow_html=True)
     card_info = [
         {"title": "Article Recommender", "description": "Discover articles tailored to your interests.", "key": "article_recommendor"},
         {"title": "Recent Article Suggester", "description": "Get suggestions for recent research articles.", "key": "research_article_suggester"},
-        {"title": "Feedback", "description": "Provide your valuable feedback.", "key": "feedback"},
     ]
     num_cols = 3
-    num_rows = math.ceil(len(card_info) / num_cols)
     for row in range(num_rows):
@@ -59,7 +66,7 @@ def show_main_page():
                     with stylable_container(
                         key="inside_container_with_border",
                         css_styles="""
-                                {
                                     background-color: #f8f9fa;
                                     border-radius: 10px;
                                     box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.1);

 from dotenv import load_dotenv
 from streamlit_extras.stylable_container import stylable_container
 from ui.article_recommendation import main as article_recommendor_main
+from ui.course_lessons_extractor import main as lessons_extractor_main
 from ui.research_paper import main as research_article_suggester_main
 from ui.til_feedback import main as feedback_main
+import math
+import streamlit as st
+import subprocess
 load_dotenv()
+# Running required system commands
+subprocess.run(["playwright", "install", "chromium"])
+st.set_page_config(page_title='Growthy AI Workflows', page_icon='📰', layout='wide')
 def load_css(file_name):
     with open(file_name) as f:
         research_article_suggester_main()
     elif st.session_state.page == "feedback":
         feedback_main()
+    elif st.session_state.page == "lessons_extractor":
+        lessons_extractor_main()
 def show_main_page():
     css = load_css("ui/main.css")
     st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
+    st.markdown('<div class="main-title">Welcome to Growthy AI Workflows!</div>', unsafe_allow_html=True)
     st.markdown("---")
+    st.markdown('<div class="sub-header">Navigate to Specific Workflow:</div>', unsafe_allow_html=True)
     card_info = [
+        {"title": "TIL Feedback", "description": "Provide your valuable feedback.", "key": "feedback"},
+        {"title": "Course Lesson Extractor", "description": "Extract lessons for a given course", "key": "lessons_extractor"},
         {"title": "Article Recommender", "description": "Discover articles tailored to your interests.", "key": "article_recommendor"},
         {"title": "Recent Article Suggester", "description": "Get suggestions for recent research articles.", "key": "research_article_suggester"},
     ]
     num_cols = 3
+    num_rows = math.ceil(len(card_info) / num_cols)
     for row in range(num_rows):
                     with stylable_container(
                         key="inside_container_with_border",
                         css_styles="""
+                                {
                                     background-color: #f8f9fa;
                                     border-radius: 10px;
                                     box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.1);

workflows/__init__.py ADDED Viewed

File without changes

{crew → workflows}/article_suggestion.py RENAMED Viewed

File without changes

workflows/courses/lessons_extractor.py ADDED Viewed

	@@ -0,0 +1,90 @@

+from langchain import callbacks
+from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
+from langchain_core.messages import SystemMessage, HumanMessage
+from langchain_core.output_parsers import JsonOutputParser
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+from typing import List
+from workflows.tools.scrape_website import WebpageScreenshot
+from crewai import Agent, Task, Crew
+from crewai_tools import ScrapeWebsiteTool
+import base64
+import os
+import pprint
+class LessonsExtractor:
+    def kickoff(self, inputs={}):
+        self.course_url = inputs["course_url"]
+        self._extract_lessons()
+        return {"run_id": self.run_id, "lessons": self.lessons}
+    def _extract_lessons(self):
+        self.course_webpage_content = self._scrape_webpage_content()
+        extractor_chain = self._build_lessons_extractor_chain()
+        pprint.pp("Extracting Lessons....")
+        with callbacks.collect_runs() as cb:
+            self.lessons = extractor_chain.invoke(
+                {"screenshot": self.course_webpage_content})["lessons"]
+            self.run_id = cb.traced_runs[0].id
+            print("Run ID: ", self.run_id)
+        print("Lessons")
+        pprint.pp(self.lessons)
+    def _scrape_webpage_content(self):
+        pprint.pp("Scraping Courses....")
+        webpage_content = WebpageScreenshot(self.course_url)
+        # Testing the screenshot taken
+        # image_data = base64.b64decode(webpage_content)
+        # with open("webpageScreenshot.png", "wb") as fh:
+        #     fh.write(image_data)
+        print("Webpage Content:")
+        pprint.pp(webpage_content)
+        return webpage_content
+    def _build_lessons_extractor_chain(self):
+        course_parser = JsonOutputParser(pydantic_object=Course)
+        prompt = ChatPromptTemplate.from_messages([
+            SystemMessage(
+                "You are an expert in understanding a course webpage. "
+                "Your goal is to extract the course content that will be covered as part of the course from the screenshot of the course webpage. "
+                f"Formatting Instructions: {course_parser.get_format_instructions()}"
+            ),
+            HumanMessage(
+                content=[
+                    {"type": "text", "text": "Here is the course webpage screenshot"},
+                    {"type": "image_url", "image_url": {
+                        "url": f"data:image/png;base64,{self.course_webpage_content}",
+                        "detail": "auto",
+                    }}
+                ]
+            )
+        ])
+        llm = ChatOpenAI(model=os.environ['OPENAI_MODEL'], temperature=0.2)
+        extractor_chian = (prompt | llm | course_parser).with_config({
+            "tags": ["courses"], "run_name": "Extracting Lessons",
+            "metadata": {
+                "versoin": "v1.0.0",
+                "growth_activity": "courses",
+                "env": os.environ["ENV"],
+                "model": os.environ["OPENAI_MODEL"],
+            }
+        })
+        return extractor_chian
+class Lesson(BaseModel):
+    name: str = Field(description="Lesson name mentioned in the screenshot.")
+    concepts: List[str] = Field(description="What are the concepts mentioned in the screeshot "
+                                "that the user will learn as part of this lesson. "
+                                "If nothing is mentioned return an empty string."
+                                )
+class Course(BaseModel):
+    lessons: List[Lesson]

{crew → workflows}/research_article_suggester.py RENAMED Viewed

@@ -12,7 +12,7 @@ from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
 from langchain_core.output_parsers import JsonOutputParser
-from tools.scrape_website import scrape_tool, CustomScrapeWebsiteTool
 MAX_RESULTS = 2
 AGE_OF_RESEARCH_PAPER = 60

 from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
 from langchain_core.output_parsers import JsonOutputParser
+from workflows.tools.scrape_website import scrape_tool, CustomScrapeWebsiteTool
 MAX_RESULTS = 2
 AGE_OF_RESEARCH_PAPER = 60

{crew → workflows}/til.py RENAMED Viewed

@@ -6,8 +6,10 @@ from langchain_core.messages import SystemMessage
 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
 from langchain_openai import ChatOpenAI
 from pydantic import BaseModel, Field, UUID4
 from typing import List, Optional
 import os
 import pprint
@@ -16,10 +18,20 @@ class TilCrew:
         print("Human Message:")
         pprint.pp(inputs)
         self.content = inputs["content"]
-        # self._gather_facts()
         self._gather_feedback()
         return self._final_call_on_feedback()
     def _final_call_on_feedback(self):
         final_results = []
         for feedback in self.feedback_results:

 from langchain_core.output_parsers import JsonOutputParser
 from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
 from langchain_openai import ChatOpenAI
+from langsmith import Client
 from pydantic import BaseModel, Field, UUID4
 from typing import List, Optional
+from workflows.utils.feedback import Feedback
 import os
 import pprint
         print("Human Message:")
         pprint.pp(inputs)
         self.content = inputs["content"]
         self._gather_feedback()
         return self._final_call_on_feedback()
+    def post_feedback(run_id: UUID4, feedback: Feedback):
+        client = Client()
+        client.create_feedback(
+            str(run_id),
+            key=feedback.metric_type,
+            score=feedback.metric_score,
+            source_info={"til": feedback.feedback_on},
+            type="api",
+        )
     def _final_call_on_feedback(self):
         final_results = []
         for feedback in self.feedback_results:

{tools → workflows/tools}/helpers.py RENAMED Viewed

File without changes

workflows/tools/scrape_website.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from bs4 import BeautifulSoup
+from crewai_tools import ScrapeWebsiteTool
+from playwright.async_api import async_playwright
+from playwright_stealth import stealth_async
+from pyppeteer import launch
+import asyncio
+import base64
+import requests
+scrape_tool = ScrapeWebsiteTool()
+def CustomScrapeWebsiteTool(url):
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
+        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
+        'Accept-Language': 'en-US,en;q=0.9',
+        'Referer': 'https://www.google.com/',
+        'Connection': 'keep-alive',
+        'Upgrade-Insecure-Requests': '1',
+        'Accept-Encoding': 'gzip, deflate, br'
+    }
+    response = requests.get(url, headers=headers)
+    parsed = BeautifulSoup(response.content, "html.parser")
+    text = parsed.get_text()
+    text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
+    text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
+    return text
+async def AsyncWebpageScreenshot(url):
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        page = await browser.new_page()
+        stealth_async(page)
+        await page.goto(url)
+        screenshot_bytes = await page.screenshot(full_page=True)
+        await browser.close()
+    base64_image = base64.b64encode(screenshot_bytes).decode("utf-8")
+    return base64_image
+def WebpageScreenshot(url):
+    print("Taking screenshot: ", url)
+    result = asyncio.run(AsyncWebpageScreenshot(url))
+    return result
+async def AsyncPyppeteerWebpageScreenshot(url):
+    browser = await launch()
+    page = await browser.newPage()
+    await page.goto(url)
+    screenshot_bytes = await page.screenshot()
+    await browser.close()
+    base64_image = base64.b64encode(screenshot_bytes).decode("utf-8")
+    return base64_image
+def PyppeteerWebpageScreenshot(url):
+    print("Taking screenshot: ", url)
+    result = asyncio.run(AsyncPyppeteerWebpageScreenshot(url))
+    return result

{tools → workflows/tools}/search_web.py RENAMED Viewed

File without changes

workflows/utils/feedback.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from pydantic import BaseModel
+from typing import List, Optional
+class Feedback(BaseModel):
+    helpful_score: Optional[float]
+    metric_type: Optional[str]
+    metric_score: Optional[float]
+    feedback_on: Optional[str]