theRealNG commited on
Commit
b59748f
·
1 Parent(s): 1c30130

Added support for research articles suggestion

Browse files
Files changed (3) hide show
  1. crew/research_article_suggester.py +150 -0
  2. requirements.txt +1 -0
  3. test.py +5 -0
crew/research_article_suggester.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from crewai import Agent, Task, Crew
2
+ from langchain_openai import ChatOpenAI
3
+ from tavily import TavilyClient
4
+ import os
5
+ import json
6
+ from pydantic import BaseModel, Field
7
+ from crewai.tasks.task_output import TaskOutput
8
+ from datetime import datetime, timedelta
9
+
10
+ from tools.scrape_website import scrape_tool
11
+
12
+ MAX_RESULTS = 5
13
+ AGE_OF_RESEARCH_PAPER = 60
14
+
15
+ class RecentArticleSuggester:
16
+ """
17
+ Suggests recent research articles based on a given topic.
18
+ """
19
+
20
+ def __init__(self):
21
+ self.tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
22
+
23
+ def kickoff(self, inputs={}):
24
+ self.topic = inputs["topic"]
25
+ suggested_research_papers = self._suggest_research_papers()
26
+ return suggested_research_papers
27
+
28
+ def _suggest_research_papers(self):
29
+ query = f"research papers on {self.topic} published in the last week"
30
+ results = self.tavily_client.search(query, max_results=MAX_RESULTS)['results']
31
+ print("Search Results: ", results)
32
+ pitch_crew = self._create_pitch_crew()
33
+ research_paper_suggestions = []
34
+ for result in results:
35
+ try:
36
+ info = pitch_crew.kickoff(inputs={
37
+ "title": result["title"],
38
+ "url": result["url"],
39
+ "content": result["content"]
40
+ })
41
+ research_paper_suggestions = research_paper_suggestions + \
42
+ [info]
43
+ except BaseException as e:
44
+ print(f"Error processing article '{result['title']}': {e}")
45
+
46
+ return research_paper_suggestions
47
+
48
+ def _create_pitch_crew(self):
49
+ information_gatherer = Agent(
50
+ role="Research Paper Information Retriever",
51
+ goal="Gather required information for the given research papers.",
52
+ verbose=True,
53
+ backstory=(
54
+ "You are an expert in gathering required details "
55
+ "about the given research paper."
56
+ ),
57
+ llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0.2),
58
+ tools=[scrape_tool],
59
+ )
60
+
61
+ def evaluator(output: TaskOutput):
62
+ article_info = json.loads(output.exported_output)
63
+ try:
64
+ date_obj = datetime.strptime(
65
+ article_info['published_on'], "%d/%m/%Y")
66
+
67
+ # Calculate the date that was 14 days ago from today
68
+ start_date = datetime.now() - timedelta(days=AGE_OF_RESEARCH_PAPER)
69
+
70
+ # Compare if the input date is older
71
+ if date_obj < start_date:
72
+ raise BaseException(f"{date_obj} Older than given timeframe {start_date}")
73
+
74
+ except ValueError:
75
+ print("Invalid date format. Please use dd/mm/yyyy.")
76
+ return False
77
+
78
+ information_gathering_task = Task(
79
+ description=(
80
+ "Here is the information of a research paper: title {title}, "
81
+ "url: {url} and content: {content}.\n"
82
+ "Gather following information about the research paper: "
83
+ "1. When was the research paper published and present it in dd/mm/yyyy format. "
84
+ "2. Who is the author of the research paper. "
85
+ ),
86
+ expected_output=(
87
+ "Following details of the research paper: title, url, "
88
+ "content/summary, date it was published and author."
89
+ ),
90
+ agent=information_gatherer,
91
+ async_exection=False,
92
+ output_json=ResearchPaper,
93
+ callback=evaluator,
94
+ )
95
+
96
+ pitcher = Agent(
97
+ role="Curiosity Catalyst",
98
+ goal="To pique the user's curiosity to read the research paper.",
99
+ verbose=True,
100
+ backstory=(
101
+ "As a Curiosity Catalyst, you know exactly how to pique the user's curiosity "
102
+ "to read the research paper."
103
+ ),
104
+ llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0.2),
105
+ tools=[scrape_tool],
106
+ )
107
+
108
+ create_pitch = Task(
109
+ description=(
110
+ "Craft the pitch so to that it teases the research paper's most intriguing aspects, "
111
+ "by posing questions that the research paper might answer or "
112
+ "highlighting surprising facts to pique the user's curiosity "
113
+ " to read the research paper so that he is up-to-date with latest research."
114
+ ),
115
+ expected_output=(
116
+ "All the details of the research paper along with the pitch."
117
+ ),
118
+ tools=[scrape_tool],
119
+ agent=pitcher,
120
+ context=[information_gathering_task],
121
+ output_json=ResearchPaperWithPitch,
122
+ )
123
+
124
+ crew = Crew(
125
+ agents=[information_gatherer, pitcher],
126
+ tasks=[information_gathering_task, create_pitch],
127
+ verbose=True,
128
+ max_rpm=4,
129
+ )
130
+
131
+ return crew
132
+
133
+
134
+ class ResearchPaper(BaseModel):
135
+ title: str
136
+ url: str
137
+ summary: str
138
+ author: str = Field(description="author of the article")
139
+ published_on: str = Field(
140
+ description="Date the article was publised on in foramt dd/mm/yyyy")
141
+
142
+
143
+ class ResearchPaperWithPitch(BaseModel):
144
+ title: str
145
+ url: str
146
+ summary: str
147
+ author: str = Field(description="author of the article")
148
+ published_on: str = Field(
149
+ description="Date the article was publised on in foramt dd/mm/yyyy")
150
+ pitch: str
requirements.txt CHANGED
@@ -5,3 +5,4 @@ langchain_community
5
  langchain_google_genai
6
  langchain_openai
7
  streamlit
 
 
5
  langchain_google_genai
6
  langchain_openai
7
  streamlit
8
+ tavily-python
test.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from crew.research_article_suggester import RecentArticleSuggester
2
+
3
+ suggester = RecentArticleSuggester()
4
+ results = suggester.kickoff(inputs={"topic": "GenAI"})
5
+ print(results)