Spaces:
Runtime error
Runtime error
| from langchain import callbacks | |
| from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate | |
| from langchain_core.messages import SystemMessage, HumanMessage | |
| from langchain_core.output_parsers import JsonOutputParser | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_openai import ChatOpenAI | |
| from pydantic import BaseModel, Field | |
| from typing import List | |
| from workflows.tools.scrape_website import WebpageScreenshot | |
| from crewai import Agent, Task, Crew | |
| from crewai_tools import ScrapeWebsiteTool | |
| import base64 | |
| import os | |
| import pprint | |
| class LessonsExtractor: | |
| def kickoff(self, inputs={}): | |
| self.course_url = inputs["course_url"] | |
| self._extract_lessons() | |
| return {"run_id": self.run_id, "lessons": self.lessons} | |
| def _extract_lessons(self): | |
| self.course_webpage_content = self._scrape_webpage_content() | |
| extractor_chain = self._build_lessons_extractor_chain() | |
| pprint.pp("Extracting Lessons....") | |
| with callbacks.collect_runs() as cb: | |
| self.lessons = extractor_chain.invoke( | |
| {"screenshot": self.course_webpage_content})["lessons"] | |
| self.run_id = cb.traced_runs[0].id | |
| print("Run ID: ", self.run_id) | |
| print("Lessons") | |
| pprint.pp(self.lessons) | |
| def _scrape_webpage_content(self): | |
| pprint.pp("Scraping Courses....") | |
| webpage_content = WebpageScreenshot(self.course_url) | |
| # Testing the screenshot taken | |
| # image_data = base64.b64decode(webpage_content) | |
| # with open("webpageScreenshot.png", "wb") as fh: | |
| # fh.write(image_data) | |
| print("Webpage Content:") | |
| pprint.pp(webpage_content) | |
| return webpage_content | |
| def _build_lessons_extractor_chain(self): | |
| course_parser = JsonOutputParser(pydantic_object=Course) | |
| prompt = ChatPromptTemplate.from_messages([ | |
| SystemMessage( | |
| "You are an expert in understanding a course webpage. " | |
| "Your goal is to extract the course content that will be covered as part of the course from the screenshot of the course webpage. " | |
| f"Formatting Instructions: {course_parser.get_format_instructions()}" | |
| ), | |
| HumanMessage( | |
| content=[ | |
| {"type": "text", "text": "Here is the course webpage screenshot"}, | |
| {"type": "image_url", "image_url": { | |
| "url": f"data:image/png;base64,{self.course_webpage_content}", | |
| "detail": "auto", | |
| }} | |
| ] | |
| ) | |
| ]) | |
| llm = ChatOpenAI(model=os.environ['OPENAI_MODEL'], temperature=0.2) | |
| extractor_chian = (prompt | llm | course_parser).with_config({ | |
| "tags": ["courses"], "run_name": "Extracting Lessons", | |
| "metadata": { | |
| "version": "v1.0.0", | |
| "growth_activity": "courses", | |
| "env": os.environ["ENV"], | |
| "model": os.environ["OPENAI_MODEL"], | |
| } | |
| }) | |
| return extractor_chian | |
| class Lesson(BaseModel): | |
| name: str = Field(description="Lesson name mentioned in the screenshot.") | |
| concepts: List[str] = Field(description="What are the concepts mentioned in the screeshot " | |
| "that the user will learn as part of this lesson. " | |
| "If nothing is mentioned return an empty string." | |
| ) | |
| class Course(BaseModel): | |
| lessons: List[Lesson] | |