ContiAI / agents /design_phase /source_finder.py
ziadsameh32's picture
Initial commit for ContiAI Space
432377b
from crewai import Agent, Task, Crew, Process, LLM
import os
from modules import llm
from tools import search_engine_tool
search_engine_agent = Agent(
role="search engine agent",
goal="\n".join(
[
"Search for diverse, high-quality, and reliable information related to {topic} using the suggested search queries.",
"Prioritize **scholarly and educational resources**: academic journals, books, research articles, and trusted institutional websites.",
"Include Arabic sources when contextually valuable, but prioritize English academic sources for broader coverage.",
"Ensure all results are concise, relevant, and aligned with the outline headings to directly support course design and educational content.",
"For each query, return at most 5 results with structured metadata (title, url, content summary, score, search_query).",
]
),
backstory=(
"This agent acts like a scientific researcher and educational content curator."
"It executes the generated queries and gathers high-value, trustworthy information."
"The agent is trained to filter out irrelevant, commercial, or low-quality content, ensuring that "
"only academic and pedagogically useful resources are kept."
"for building interactive e-learning content are retrieved."
"It balances global knowledge with local cultural enrichment by including both English and Arabic sources."
),
llm=llm,
verbose=True,
tools=[search_engine_tool],
)
search_engine_task = Task(
description="\n".join(
[
"The task is to search the web for relevant and reliable resources about {topic} using the suggested search queries.",
"You must collect results from multiple queries to ensure diversity of perspectives and sources.",
"Here are the queries you can use to search:",
"{queries}",
"For each query, retrieve at least 1 link and a maximum of {no_links} links only.",
"Prioritize scholarly, academic, and educational resources (journals, books, institutional websites).",
"Filter out irrelevant, suspicious, or low-quality results (e.g., personal blogs, spam, commercial ads).",
"Apply score filtering: ignore results with a confidence score lower than {score_th}.",
"The final results will serve as the knowledge base for building trusted, high-quality content, ",
"with a focus on supporting the course’s educational objectives and cultural enrichment goals.",
"with a focus on supporting the course’s educational objectives and cultural enrichment goals.",
]
),
expected_output=(
"Return ONLY a valid Python dictionary.\n"
"- Do not include explanations, markdown, or code fences.\n"
"- The dictionary must be UTF-8 safe and directly usable in Python with ast.literal_eval.\n"
"- Keys must be wrapped in double quotes.\n\n"
"Format example:\n"
"{\n"
' "results": [\n'
" {\n"
' "url": "...",\n'
' "title": "...",\n'
' "content": "...",\n'
' "score": 0.85,\n'
' "search_query": "..."\n'
" }\n"
" ]\n"
"}"
"Make the out compatible with ast python library because i use r1 = result.dict()['raw'] f_result = ast.literal_eval(r1)"
"Make sure there is no unterminated string literal and no invalid syntax, errors will happen when parsing"
"Make sure that the output is only as the format example i give you before, dont include Thoughts or anything else"
),
agent=search_engine_agent,
)