Spaces:
Runtime error
Runtime error
Merge pull request #19 from beautiful-code/course_lessons_extraction
Browse files- .gitignore +1 -0
- agents/article_evaluator.py +2 -2
- agents/curiosity_catalyst.py +2 -2
- agents/learning_curator.py +3 -3
- agents/learning_profiler.py +2 -2
- endpoints.py +3 -13
- packages.txt +8 -0
- requirements.txt +6 -0
- tasks/create_article_pitch.py +1 -1
- tools/scrape_website.py +0 -14
- ui/article_recommendation.py +1 -1
- ui/course_lessons_extractor.py +57 -0
- ui/research_paper.py +2 -2
- ui/til_feedback.py +1 -1
- ui_main.py +16 -9
- workflows/__init__.py +0 -0
- {crew → workflows}/article_suggestion.py +0 -0
- workflows/courses/lessons_extractor.py +90 -0
- {crew → workflows}/research_article_suggester.py +1 -1
- {crew → workflows}/til.py +13 -1
- {tools → workflows/tools}/helpers.py +0 -0
- workflows/tools/scrape_website.py +67 -0
- {tools → workflows/tools}/search_web.py +0 -0
- workflows/utils/feedback.py +8 -0
.gitignore
CHANGED
|
@@ -7,3 +7,4 @@ evaluated_articles.json
|
|
| 7 |
final_articles.json
|
| 8 |
learning_profile.json
|
| 9 |
pitched_articles.json
|
|
|
|
|
|
| 7 |
final_articles.json
|
| 8 |
learning_profile.json
|
| 9 |
pitched_articles.json
|
| 10 |
+
webpageScreenshot.png
|
agents/article_evaluator.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from crewai import Agent
|
| 2 |
from llms.gpt import llm
|
| 3 |
-
from tools.helpers import streamlit_callback
|
| 4 |
-
from tools.scrape_website import scrape_tool
|
| 5 |
|
| 6 |
article_evaluator = Agent(
|
| 7 |
role="Recommended Article Evaluator",
|
|
|
|
| 1 |
from crewai import Agent
|
| 2 |
from llms.gpt import llm
|
| 3 |
+
from workflows.tools.helpers import streamlit_callback
|
| 4 |
+
from workflows.tools.scrape_website import scrape_tool
|
| 5 |
|
| 6 |
article_evaluator = Agent(
|
| 7 |
role="Recommended Article Evaluator",
|
agents/curiosity_catalyst.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from crewai import Agent
|
| 2 |
from llms.gpt import llm
|
| 3 |
-
from tools.helpers import streamlit_callback
|
| 4 |
-
from tools.scrape_website import scrape_tool
|
| 5 |
|
| 6 |
curiosity_catalyst = Agent(
|
| 7 |
role="Curiosity Catalyst",
|
|
|
|
| 1 |
from crewai import Agent
|
| 2 |
from llms.gpt import llm
|
| 3 |
+
from workflows.tools.helpers import streamlit_callback
|
| 4 |
+
from workflows.tools.scrape_website import scrape_tool
|
| 5 |
|
| 6 |
curiosity_catalyst = Agent(
|
| 7 |
role="Curiosity Catalyst",
|
agents/learning_curator.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
from crewai import Agent
|
| 2 |
from llms.gpt import llm
|
| 3 |
-
from tools.helpers import streamlit_callback
|
| 4 |
-
from tools.scrape_website import scrape_tool
|
| 5 |
-
from tools.search_web import search_tool
|
| 6 |
|
| 7 |
learning_curator = Agent(
|
| 8 |
role="Personal Learning Curator",
|
|
|
|
| 1 |
from crewai import Agent
|
| 2 |
from llms.gpt import llm
|
| 3 |
+
from workflows.tools.helpers import streamlit_callback
|
| 4 |
+
from workflows.tools.scrape_website import scrape_tool
|
| 5 |
+
from workflows.tools.search_web import search_tool
|
| 6 |
|
| 7 |
learning_curator = Agent(
|
| 8 |
role="Personal Learning Curator",
|
agents/learning_profiler.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
from crewai import Agent
|
| 2 |
from llms.gpt import llm
|
| 3 |
-
from tools.helpers import streamlit_callback
|
| 4 |
-
from tools.scrape_website import scrape_tool
|
| 5 |
|
| 6 |
learning_profiler = Agent(
|
| 7 |
role="Personal Learning Profiler",
|
|
|
|
| 1 |
from crewai import Agent
|
| 2 |
from llms.gpt import llm
|
| 3 |
+
from workflows.tools.helpers import streamlit_callback
|
| 4 |
+
from workflows.tools.scrape_website import scrape_tool
|
| 5 |
|
| 6 |
learning_profiler = Agent(
|
| 7 |
role="Personal Learning Profiler",
|
endpoints.py
CHANGED
|
@@ -1,7 +1,8 @@
|
|
| 1 |
from dotenv import load_dotenv
|
| 2 |
import uvicorn
|
| 3 |
from fastapi import FastAPI, Query
|
| 4 |
-
from .
|
|
|
|
| 5 |
from fastapi.middleware.cors import CORSMiddleware
|
| 6 |
from langsmith import Client
|
| 7 |
from typing import List, Optional
|
|
@@ -45,22 +46,11 @@ async def til_feedback_kickoff(content: List[str]) -> TilFeedbackResponse:
|
|
| 45 |
result = TilCrew().kickoff(inputs)
|
| 46 |
return result
|
| 47 |
|
| 48 |
-
class Feedback(BaseModel):
|
| 49 |
-
helpful_score: Optional[float]
|
| 50 |
-
feedback_on: Optional[str]
|
| 51 |
-
|
| 52 |
@app.post("/til_feedback/{run_id}/feedback", tags=["til_feedback"])
|
| 53 |
async def capture_feedback(run_id: UUID4, feedback: Feedback) -> str:
|
| 54 |
print("Helful Score: ", feedback.helpful_score)
|
| 55 |
print("Feedback On: ", feedback.feedback_on)
|
| 56 |
-
|
| 57 |
-
client.create_feedback(
|
| 58 |
-
str(run_id),
|
| 59 |
-
key="helpful",
|
| 60 |
-
score=feedback.helpful_score,
|
| 61 |
-
source_info={"til": feedback.feedback_on},
|
| 62 |
-
type="api",
|
| 63 |
-
)
|
| 64 |
return "ok"
|
| 65 |
|
| 66 |
@app.get("/healthcheck")
|
|
|
|
| 1 |
from dotenv import load_dotenv
|
| 2 |
import uvicorn
|
| 3 |
from fastapi import FastAPI, Query
|
| 4 |
+
from workflows.til import TilCrew, TilFeedbackResponse
|
| 5 |
+
from workflows.utils.feedback import Feedback
|
| 6 |
from fastapi.middleware.cors import CORSMiddleware
|
| 7 |
from langsmith import Client
|
| 8 |
from typing import List, Optional
|
|
|
|
| 46 |
result = TilCrew().kickoff(inputs)
|
| 47 |
return result
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
@app.post("/til_feedback/{run_id}/feedback", tags=["til_feedback"])
|
| 50 |
async def capture_feedback(run_id: UUID4, feedback: Feedback) -> str:
|
| 51 |
print("Helful Score: ", feedback.helpful_score)
|
| 52 |
print("Feedback On: ", feedback.feedback_on)
|
| 53 |
+
TilCrew.post_feedback(run_id=run_id, feedback=feedback)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
return "ok"
|
| 55 |
|
| 56 |
@app.get("/healthcheck")
|
packages.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
libnss3
|
| 2 |
+
libnspr4
|
| 3 |
+
libatk1.0-0
|
| 4 |
+
libatk-bridge2.0-0
|
| 5 |
+
libcups2
|
| 6 |
+
libatspi2.0-0
|
| 7 |
+
libxcomposite1
|
| 8 |
+
libxdamage1
|
requirements.txt
CHANGED
|
@@ -14,3 +14,9 @@ uvicorn
|
|
| 14 |
fastapi_cors
|
| 15 |
langsmith
|
| 16 |
pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
fastapi_cors
|
| 15 |
langsmith
|
| 16 |
pytest
|
| 17 |
+
playwright
|
| 18 |
+
playwright-stealth
|
| 19 |
+
unstructured
|
| 20 |
+
asyncio
|
| 21 |
+
psutil
|
| 22 |
+
pyppeteer
|
tasks/create_article_pitch.py
CHANGED
|
@@ -9,7 +9,7 @@ from pydantic import BaseModel
|
|
| 9 |
from typing import List
|
| 10 |
|
| 11 |
from agents.curiosity_catalyst import curiosity_catalyst
|
| 12 |
-
from tools.scrape_website import scrape_tool
|
| 13 |
from tasks.create_learning_profile import learning_profile_task
|
| 14 |
from tasks.evaluate_articles import evaluation_task
|
| 15 |
|
|
|
|
| 9 |
from typing import List
|
| 10 |
|
| 11 |
from agents.curiosity_catalyst import curiosity_catalyst
|
| 12 |
+
from workflows.tools.scrape_website import scrape_tool
|
| 13 |
from tasks.create_learning_profile import learning_profile_task
|
| 14 |
from tasks.evaluate_articles import evaluation_task
|
| 15 |
|
tools/scrape_website.py
DELETED
|
@@ -1,14 +0,0 @@
|
|
| 1 |
-
from crewai_tools import ScrapeWebsiteTool
|
| 2 |
-
import requests
|
| 3 |
-
from bs4 import BeautifulSoup
|
| 4 |
-
|
| 5 |
-
scrape_tool = ScrapeWebsiteTool()
|
| 6 |
-
|
| 7 |
-
def CustomScrapeWebsiteTool(url):
|
| 8 |
-
response = requests.get(url)
|
| 9 |
-
parsed = BeautifulSoup(response.content, "html.parser")
|
| 10 |
-
text = parsed.get_text()
|
| 11 |
-
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
|
| 12 |
-
text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
|
| 13 |
-
|
| 14 |
-
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ui/article_recommendation.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
| 3 |
import streamlit as st
|
| 4 |
import utils.settings as settings
|
| 5 |
|
| 6 |
-
from
|
| 7 |
from utils.write_to_json import write_dict_to_json as write_dict_to_json
|
| 8 |
load_dotenv()
|
| 9 |
settings.init()
|
|
|
|
| 3 |
import streamlit as st
|
| 4 |
import utils.settings as settings
|
| 5 |
|
| 6 |
+
from workflows.article_suggestion import article_recommendation_crew
|
| 7 |
from utils.write_to_json import write_dict_to_json as write_dict_to_json
|
| 8 |
load_dotenv()
|
| 9 |
settings.init()
|
ui/course_lessons_extractor.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import asyncio
|
| 3 |
+
from contextlib import contextmanager
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
from workflows.courses.lessons_extractor import LessonsExtractor
|
| 6 |
+
from streamlit_extras.capture import stdout
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@contextmanager
|
| 11 |
+
def setup_event_loop():
|
| 12 |
+
loop = asyncio.new_event_loop()
|
| 13 |
+
asyncio.set_event_loop(loop)
|
| 14 |
+
try:
|
| 15 |
+
yield loop
|
| 16 |
+
finally:
|
| 17 |
+
loop.close()
|
| 18 |
+
asyncio.set_event_loop(None)
|
| 19 |
+
|
| 20 |
+
def main():
|
| 21 |
+
st.markdown("<div class='container'>", unsafe_allow_html=True)
|
| 22 |
+
|
| 23 |
+
st.markdown(
|
| 24 |
+
"""
|
| 25 |
+
<div class="centered">
|
| 26 |
+
<p class="title">Course Lesson Extractor</p>
|
| 27 |
+
</div>
|
| 28 |
+
""",
|
| 29 |
+
unsafe_allow_html=True
|
| 30 |
+
)
|
| 31 |
+
course_url = st.text_area('Enter the URL for the course:',
|
| 32 |
+
"https://www.coursera.org/learn/google-data-analytics-capstone?specialization=google-data-analytics",
|
| 33 |
+
key='course_url', help='Enter course you want to learn')
|
| 34 |
+
|
| 35 |
+
if st.button("Get Lessons"):
|
| 36 |
+
with st.status(
|
| 37 |
+
"🤖 **Extracting Lessons...**", state="running", expanded=True
|
| 38 |
+
) as status:
|
| 39 |
+
with st.container(height=500, border=False):
|
| 40 |
+
log_container = st.empty()
|
| 41 |
+
with stdout(log_container.code, terminator=""):
|
| 42 |
+
with setup_event_loop() as loop:
|
| 43 |
+
extractor = LessonsExtractor()
|
| 44 |
+
inputs = {"course_url": course_url}
|
| 45 |
+
results = extractor.kickoff(inputs=inputs)["lessons"]
|
| 46 |
+
status.update(
|
| 47 |
+
label="✅ Extracted Lessons!",
|
| 48 |
+
state="complete",
|
| 49 |
+
expanded=False,
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
for idx, lesson in enumerate(results):
|
| 53 |
+
st.markdown(f"#### Lessons {idx}: {lesson['name']}")
|
| 54 |
+
st.markdown(f"Concpets: {', '.join(lesson['concepts'])}")
|
| 55 |
+
|
| 56 |
+
if __name__ == "__main__":
|
| 57 |
+
main()
|
ui/research_paper.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
-
from
|
| 3 |
from streamlit_extras.capture import stdout
|
| 4 |
|
| 5 |
|
| 6 |
|
| 7 |
def main():
|
| 8 |
-
|
| 9 |
st.markdown(
|
| 10 |
"""
|
| 11 |
<style>
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
+
from workflows.research_article_suggester import RecentArticleSuggester
|
| 3 |
from streamlit_extras.capture import stdout
|
| 4 |
|
| 5 |
|
| 6 |
|
| 7 |
def main():
|
| 8 |
+
|
| 9 |
st.markdown(
|
| 10 |
"""
|
| 11 |
<style>
|
ui/til_feedback.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
-
from
|
| 4 |
from streamlit_extras.capture import stdout
|
| 5 |
load_dotenv()
|
| 6 |
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from dotenv import load_dotenv
|
| 3 |
+
from workflows.til import TilCrew
|
| 4 |
from streamlit_extras.capture import stdout
|
| 5 |
load_dotenv()
|
| 6 |
|
ui_main.py
CHANGED
|
@@ -1,16 +1,20 @@
|
|
| 1 |
-
import streamlit as st
|
| 2 |
-
import math
|
| 3 |
from dotenv import load_dotenv
|
| 4 |
from streamlit_extras.stylable_container import stylable_container
|
| 5 |
-
from PIL import Image
|
| 6 |
from ui.article_recommendation import main as article_recommendor_main
|
|
|
|
| 7 |
from ui.research_paper import main as research_article_suggester_main
|
| 8 |
from ui.til_feedback import main as feedback_main
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
load_dotenv()
|
| 11 |
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
st.set_page_config(page_title='
|
| 14 |
|
| 15 |
def load_css(file_name):
|
| 16 |
with open(file_name) as f:
|
|
@@ -29,24 +33,27 @@ def main():
|
|
| 29 |
research_article_suggester_main()
|
| 30 |
elif st.session_state.page == "feedback":
|
| 31 |
feedback_main()
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def show_main_page():
|
| 34 |
|
| 35 |
css = load_css("ui/main.css")
|
| 36 |
st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
|
| 37 |
|
| 38 |
-
st.markdown('<div class="main-title">Welcome to
|
| 39 |
st.markdown("---")
|
| 40 |
-
st.markdown('<div class="sub-header">Navigate to Specific
|
| 41 |
|
| 42 |
card_info = [
|
|
|
|
|
|
|
| 43 |
{"title": "Article Recommender", "description": "Discover articles tailored to your interests.", "key": "article_recommendor"},
|
| 44 |
{"title": "Recent Article Suggester", "description": "Get suggestions for recent research articles.", "key": "research_article_suggester"},
|
| 45 |
-
{"title": "Feedback", "description": "Provide your valuable feedback.", "key": "feedback"},
|
| 46 |
]
|
| 47 |
|
| 48 |
num_cols = 3
|
| 49 |
-
num_rows = math.ceil(len(card_info) / num_cols)
|
| 50 |
|
| 51 |
|
| 52 |
for row in range(num_rows):
|
|
@@ -59,7 +66,7 @@ def show_main_page():
|
|
| 59 |
with stylable_container(
|
| 60 |
key="inside_container_with_border",
|
| 61 |
css_styles="""
|
| 62 |
-
{
|
| 63 |
background-color: #f8f9fa;
|
| 64 |
border-radius: 10px;
|
| 65 |
box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.1);
|
|
|
|
|
|
|
|
|
|
| 1 |
from dotenv import load_dotenv
|
| 2 |
from streamlit_extras.stylable_container import stylable_container
|
|
|
|
| 3 |
from ui.article_recommendation import main as article_recommendor_main
|
| 4 |
+
from ui.course_lessons_extractor import main as lessons_extractor_main
|
| 5 |
from ui.research_paper import main as research_article_suggester_main
|
| 6 |
from ui.til_feedback import main as feedback_main
|
| 7 |
+
import math
|
| 8 |
+
import streamlit as st
|
| 9 |
+
import subprocess
|
| 10 |
|
| 11 |
load_dotenv()
|
| 12 |
|
| 13 |
+
# Running required system commands
|
| 14 |
+
subprocess.run(["playwright", "install", "chromium"])
|
| 15 |
+
|
| 16 |
|
| 17 |
+
st.set_page_config(page_title='Growthy AI Workflows', page_icon='📰', layout='wide')
|
| 18 |
|
| 19 |
def load_css(file_name):
|
| 20 |
with open(file_name) as f:
|
|
|
|
| 33 |
research_article_suggester_main()
|
| 34 |
elif st.session_state.page == "feedback":
|
| 35 |
feedback_main()
|
| 36 |
+
elif st.session_state.page == "lessons_extractor":
|
| 37 |
+
lessons_extractor_main()
|
| 38 |
|
| 39 |
def show_main_page():
|
| 40 |
|
| 41 |
css = load_css("ui/main.css")
|
| 42 |
st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
|
| 43 |
|
| 44 |
+
st.markdown('<div class="main-title">Welcome to Growthy AI Workflows!</div>', unsafe_allow_html=True)
|
| 45 |
st.markdown("---")
|
| 46 |
+
st.markdown('<div class="sub-header">Navigate to Specific Workflow:</div>', unsafe_allow_html=True)
|
| 47 |
|
| 48 |
card_info = [
|
| 49 |
+
{"title": "TIL Feedback", "description": "Provide your valuable feedback.", "key": "feedback"},
|
| 50 |
+
{"title": "Course Lesson Extractor", "description": "Extract lessons for a given course", "key": "lessons_extractor"},
|
| 51 |
{"title": "Article Recommender", "description": "Discover articles tailored to your interests.", "key": "article_recommendor"},
|
| 52 |
{"title": "Recent Article Suggester", "description": "Get suggestions for recent research articles.", "key": "research_article_suggester"},
|
|
|
|
| 53 |
]
|
| 54 |
|
| 55 |
num_cols = 3
|
| 56 |
+
num_rows = math.ceil(len(card_info) / num_cols)
|
| 57 |
|
| 58 |
|
| 59 |
for row in range(num_rows):
|
|
|
|
| 66 |
with stylable_container(
|
| 67 |
key="inside_container_with_border",
|
| 68 |
css_styles="""
|
| 69 |
+
{
|
| 70 |
background-color: #f8f9fa;
|
| 71 |
border-radius: 10px;
|
| 72 |
box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.1);
|
workflows/__init__.py
ADDED
|
File without changes
|
{crew → workflows}/article_suggestion.py
RENAMED
|
File without changes
|
workflows/courses/lessons_extractor.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from langchain import callbacks
|
| 2 |
+
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate
|
| 3 |
+
from langchain_core.messages import SystemMessage, HumanMessage
|
| 4 |
+
from langchain_core.output_parsers import JsonOutputParser
|
| 5 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
| 6 |
+
from langchain_openai import ChatOpenAI
|
| 7 |
+
from pydantic import BaseModel, Field
|
| 8 |
+
from typing import List
|
| 9 |
+
from workflows.tools.scrape_website import WebpageScreenshot
|
| 10 |
+
from crewai import Agent, Task, Crew
|
| 11 |
+
from crewai_tools import ScrapeWebsiteTool
|
| 12 |
+
import base64
|
| 13 |
+
import os
|
| 14 |
+
import pprint
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class LessonsExtractor:
|
| 18 |
+
def kickoff(self, inputs={}):
|
| 19 |
+
self.course_url = inputs["course_url"]
|
| 20 |
+
self._extract_lessons()
|
| 21 |
+
|
| 22 |
+
return {"run_id": self.run_id, "lessons": self.lessons}
|
| 23 |
+
|
| 24 |
+
def _extract_lessons(self):
|
| 25 |
+
self.course_webpage_content = self._scrape_webpage_content()
|
| 26 |
+
extractor_chain = self._build_lessons_extractor_chain()
|
| 27 |
+
pprint.pp("Extracting Lessons....")
|
| 28 |
+
with callbacks.collect_runs() as cb:
|
| 29 |
+
self.lessons = extractor_chain.invoke(
|
| 30 |
+
{"screenshot": self.course_webpage_content})["lessons"]
|
| 31 |
+
self.run_id = cb.traced_runs[0].id
|
| 32 |
+
print("Run ID: ", self.run_id)
|
| 33 |
+
print("Lessons")
|
| 34 |
+
pprint.pp(self.lessons)
|
| 35 |
+
|
| 36 |
+
def _scrape_webpage_content(self):
|
| 37 |
+
pprint.pp("Scraping Courses....")
|
| 38 |
+
webpage_content = WebpageScreenshot(self.course_url)
|
| 39 |
+
# Testing the screenshot taken
|
| 40 |
+
# image_data = base64.b64decode(webpage_content)
|
| 41 |
+
# with open("webpageScreenshot.png", "wb") as fh:
|
| 42 |
+
# fh.write(image_data)
|
| 43 |
+
|
| 44 |
+
print("Webpage Content:")
|
| 45 |
+
pprint.pp(webpage_content)
|
| 46 |
+
return webpage_content
|
| 47 |
+
|
| 48 |
+
def _build_lessons_extractor_chain(self):
|
| 49 |
+
course_parser = JsonOutputParser(pydantic_object=Course)
|
| 50 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 51 |
+
SystemMessage(
|
| 52 |
+
"You are an expert in understanding a course webpage. "
|
| 53 |
+
"Your goal is to extract the course content that will be covered as part of the course from the screenshot of the course webpage. "
|
| 54 |
+
f"Formatting Instructions: {course_parser.get_format_instructions()}"
|
| 55 |
+
),
|
| 56 |
+
HumanMessage(
|
| 57 |
+
content=[
|
| 58 |
+
{"type": "text", "text": "Here is the course webpage screenshot"},
|
| 59 |
+
{"type": "image_url", "image_url": {
|
| 60 |
+
"url": f"data:image/png;base64,{self.course_webpage_content}",
|
| 61 |
+
"detail": "auto",
|
| 62 |
+
}}
|
| 63 |
+
]
|
| 64 |
+
)
|
| 65 |
+
])
|
| 66 |
+
llm = ChatOpenAI(model=os.environ['OPENAI_MODEL'], temperature=0.2)
|
| 67 |
+
|
| 68 |
+
extractor_chian = (prompt | llm | course_parser).with_config({
|
| 69 |
+
"tags": ["courses"], "run_name": "Extracting Lessons",
|
| 70 |
+
"metadata": {
|
| 71 |
+
"versoin": "v1.0.0",
|
| 72 |
+
"growth_activity": "courses",
|
| 73 |
+
"env": os.environ["ENV"],
|
| 74 |
+
"model": os.environ["OPENAI_MODEL"],
|
| 75 |
+
}
|
| 76 |
+
})
|
| 77 |
+
|
| 78 |
+
return extractor_chian
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
class Lesson(BaseModel):
|
| 82 |
+
name: str = Field(description="Lesson name mentioned in the screenshot.")
|
| 83 |
+
concepts: List[str] = Field(description="What are the concepts mentioned in the screeshot "
|
| 84 |
+
"that the user will learn as part of this lesson. "
|
| 85 |
+
"If nothing is mentioned return an empty string."
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
class Course(BaseModel):
|
| 90 |
+
lessons: List[Lesson]
|
{crew → workflows}/research_article_suggester.py
RENAMED
|
@@ -12,7 +12,7 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
| 12 |
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
|
| 13 |
from langchain_core.output_parsers import JsonOutputParser
|
| 14 |
|
| 15 |
-
from tools.scrape_website import scrape_tool, CustomScrapeWebsiteTool
|
| 16 |
|
| 17 |
MAX_RESULTS = 2
|
| 18 |
AGE_OF_RESEARCH_PAPER = 60
|
|
|
|
| 12 |
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
|
| 13 |
from langchain_core.output_parsers import JsonOutputParser
|
| 14 |
|
| 15 |
+
from workflows.tools.scrape_website import scrape_tool, CustomScrapeWebsiteTool
|
| 16 |
|
| 17 |
MAX_RESULTS = 2
|
| 18 |
AGE_OF_RESEARCH_PAPER = 60
|
{crew → workflows}/til.py
RENAMED
|
@@ -6,8 +6,10 @@ from langchain_core.messages import SystemMessage
|
|
| 6 |
from langchain_core.output_parsers import JsonOutputParser
|
| 7 |
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
|
| 8 |
from langchain_openai import ChatOpenAI
|
|
|
|
| 9 |
from pydantic import BaseModel, Field, UUID4
|
| 10 |
from typing import List, Optional
|
|
|
|
| 11 |
import os
|
| 12 |
import pprint
|
| 13 |
|
|
@@ -16,10 +18,20 @@ class TilCrew:
|
|
| 16 |
print("Human Message:")
|
| 17 |
pprint.pp(inputs)
|
| 18 |
self.content = inputs["content"]
|
| 19 |
-
# self._gather_facts()
|
| 20 |
self._gather_feedback()
|
| 21 |
return self._final_call_on_feedback()
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def _final_call_on_feedback(self):
|
| 24 |
final_results = []
|
| 25 |
for feedback in self.feedback_results:
|
|
|
|
| 6 |
from langchain_core.output_parsers import JsonOutputParser
|
| 7 |
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
|
| 8 |
from langchain_openai import ChatOpenAI
|
| 9 |
+
from langsmith import Client
|
| 10 |
from pydantic import BaseModel, Field, UUID4
|
| 11 |
from typing import List, Optional
|
| 12 |
+
from workflows.utils.feedback import Feedback
|
| 13 |
import os
|
| 14 |
import pprint
|
| 15 |
|
|
|
|
| 18 |
print("Human Message:")
|
| 19 |
pprint.pp(inputs)
|
| 20 |
self.content = inputs["content"]
|
|
|
|
| 21 |
self._gather_feedback()
|
| 22 |
return self._final_call_on_feedback()
|
| 23 |
|
| 24 |
+
def post_feedback(run_id: UUID4, feedback: Feedback):
|
| 25 |
+
client = Client()
|
| 26 |
+
client.create_feedback(
|
| 27 |
+
str(run_id),
|
| 28 |
+
key=feedback.metric_type,
|
| 29 |
+
score=feedback.metric_score,
|
| 30 |
+
source_info={"til": feedback.feedback_on},
|
| 31 |
+
type="api",
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
def _final_call_on_feedback(self):
|
| 36 |
final_results = []
|
| 37 |
for feedback in self.feedback_results:
|
{tools → workflows/tools}/helpers.py
RENAMED
|
File without changes
|
workflows/tools/scrape_website.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from bs4 import BeautifulSoup
|
| 2 |
+
from crewai_tools import ScrapeWebsiteTool
|
| 3 |
+
from playwright.async_api import async_playwright
|
| 4 |
+
from playwright_stealth import stealth_async
|
| 5 |
+
from pyppeteer import launch
|
| 6 |
+
import asyncio
|
| 7 |
+
import base64
|
| 8 |
+
import requests
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
scrape_tool = ScrapeWebsiteTool()
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def CustomScrapeWebsiteTool(url):
|
| 15 |
+
headers = {
|
| 16 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
|
| 17 |
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
| 18 |
+
'Accept-Language': 'en-US,en;q=0.9',
|
| 19 |
+
'Referer': 'https://www.google.com/',
|
| 20 |
+
'Connection': 'keep-alive',
|
| 21 |
+
'Upgrade-Insecure-Requests': '1',
|
| 22 |
+
'Accept-Encoding': 'gzip, deflate, br'
|
| 23 |
+
}
|
| 24 |
+
response = requests.get(url, headers=headers)
|
| 25 |
+
parsed = BeautifulSoup(response.content, "html.parser")
|
| 26 |
+
text = parsed.get_text()
|
| 27 |
+
text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
|
| 28 |
+
text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
|
| 29 |
+
|
| 30 |
+
return text
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
async def AsyncWebpageScreenshot(url):
|
| 34 |
+
async with async_playwright() as p:
|
| 35 |
+
browser = await p.chromium.launch(headless=True)
|
| 36 |
+
page = await browser.new_page()
|
| 37 |
+
stealth_async(page)
|
| 38 |
+
await page.goto(url)
|
| 39 |
+
|
| 40 |
+
screenshot_bytes = await page.screenshot(full_page=True)
|
| 41 |
+
|
| 42 |
+
await browser.close()
|
| 43 |
+
|
| 44 |
+
base64_image = base64.b64encode(screenshot_bytes).decode("utf-8")
|
| 45 |
+
return base64_image
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def WebpageScreenshot(url):
|
| 49 |
+
print("Taking screenshot: ", url)
|
| 50 |
+
result = asyncio.run(AsyncWebpageScreenshot(url))
|
| 51 |
+
return result
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
async def AsyncPyppeteerWebpageScreenshot(url):
|
| 55 |
+
browser = await launch()
|
| 56 |
+
page = await browser.newPage()
|
| 57 |
+
await page.goto(url)
|
| 58 |
+
screenshot_bytes = await page.screenshot()
|
| 59 |
+
await browser.close()
|
| 60 |
+
|
| 61 |
+
base64_image = base64.b64encode(screenshot_bytes).decode("utf-8")
|
| 62 |
+
return base64_image
|
| 63 |
+
|
| 64 |
+
def PyppeteerWebpageScreenshot(url):
|
| 65 |
+
print("Taking screenshot: ", url)
|
| 66 |
+
result = asyncio.run(AsyncPyppeteerWebpageScreenshot(url))
|
| 67 |
+
return result
|
{tools → workflows/tools}/search_web.py
RENAMED
|
File without changes
|
workflows/utils/feedback.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel
|
| 2 |
+
from typing import List, Optional
|
| 3 |
+
|
| 4 |
+
class Feedback(BaseModel):
|
| 5 |
+
helpful_score: Optional[float]
|
| 6 |
+
metric_type: Optional[str]
|
| 7 |
+
metric_score: Optional[float]
|
| 8 |
+
feedback_on: Optional[str]
|