ResearchSpark / app.py
prasannahf's picture
Update app.py
ea60fd3 verified
import os
import pathlib
# Fix directory permissions for Hugging Face / Docker
os.environ["MEM0_DIR"] = "/tmp/.mem0"
os.environ["EMBEDCHAIN_DIR"] = "/tmp/.embedchain"
os.environ["HOME"] = "/tmp"
# Patch path functions to use /tmp
os.path.expanduser = lambda path: path.replace("~", "/tmp")
pathlib.Path.home = lambda: pathlib.Path("/tmp")
import streamlit as st
from crewai import Agent, Task, Crew
from crewai_tools import SerperDevTool
from crewai.tools import BaseTool
import arxiv
import os
# Custom ArxivSearchTool
class ArxivSearchTool(BaseTool):
name: str = "ArxivSearch"
description: str = "Tool to search scientific papers from arXiv"
def _run(self, query: str) -> str:
results = list(arxiv.Search(query=query, max_results=3).results())
return "\n".join(f"{r.title} - {r.entry_id}" for r in results)
# Custom FileIOTool
class FileIOTool(BaseTool):
name: str = "FileIOTool"
description: str = "Tool to read from and write to files"
def _run(self, action: str, filename: str, content: str = None) -> str:
if action == "read":
try:
with open(filename, 'r') as f:
return f.read()
except FileNotFoundError:
return f"Error: File {filename} not found."
elif action == "write":
with open(filename, 'w') as f:
f.write(content)
return f"Content written to {filename}"
else:
return "Error: Invalid action. Use 'read' or 'write'."
import os
if os.getenv("SERPER_API_KEY"):
os.environ["SERPER_API_KEY"] = os.getenv("SERPER_API_KEY")
else:
st.error("SERPER_API_KEY not found in environment.")
if os.getenv("OPENAI_API_KEY"):
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
else:
st.error("OPENAI_API_KEY not found in environment.")
# User Inputs Form
with st.form(key='user_inputs_form'):
field = st.text_input("Field of Study", value="Biology")
interest = st.text_input("Specific Interest", value="Genetics, CRISPR")
academic_level = st.text_input("Academic Level", value="Undergraduate")
resources = st.text_input("Available Resources", value="Python, bioinformatics tools, open-source datasets")
scope = st.text_input("Project Scope", value="3-month project")
preference = st.text_input("Preference", value="Climate change solutions")
submit_button = st.form_submit_button(label="Generate Problem Statement")
if submit_button:
# User inputs dictionary
user_inputs = {
"field": field,
"interest": interest,
"academic_level": academic_level,
"resources": resources,
"scope": scope,
"preference": preference
}
# Instantiate tools
search_tool = SerperDevTool()
file_io_tool = FileIOTool()
arxiv_tool = ArxivSearchTool()
# Define Agents
researcher = Agent(
role='Researcher',
goal='Find open-access genetics papers from arXiv and Semantic Scholar',
backstory='Expert in sourcing academic literature from archives.',
tools=[search_tool, arxiv_tool, file_io_tool],
llm="openai/gpt-4o-mini",
verbose=True
)
analyst = Agent(
role='Analyst',
goal='Identify novel research gaps for undergraduate projects',
backstory='Skilled at spotting underexplored areas in research.',
tools=[file_io_tool],
llm="openai/gpt-4o-mini",
verbose=True
)
writer = Agent(
role='Writer',
goal='Craft clear, novel problem statements for students',
backstory='Expert in translating research gaps into actionable project aims.',
tools=[file_io_tool],
llm="openai/gpt-4o-mini",
verbose=True
)
validator = Agent(
role='Validator',
goal='Ensure the novelty of the problem statement',
backstory='Expert in verifying originality by cross-checking with existing research.',
tools=[search_tool, arxiv_tool],
llm="openai/gpt-4o-mini",
verbose=True
)
# Define Tasks
research_task = Task(
description=f'Search arXiv and Semantic Scholar for open-access papers on {user_inputs["interest"]} from 2024–2025. Save abstracts to a file.',
expected_output='A text file with 3–5 paper summaries.',
agent=researcher,
output_file='summaries.txt'
)
analysis_task = Task(
description=f'Analyze summaries.txt to identify a novel research gap suitable for an {user_inputs["academic_level"]} in {user_inputs["field"]}.',
expected_output='A clear description of a research gap.',
agent=analyst
)
writing_task = Task(
description=f'Generate a problem statement for an {user_inputs["academic_level"]} in {user_inputs["field"]} interested in {user_inputs["interest"]}, using the identified gap. Include feasibility for {user_inputs["resources"]} and {user_inputs["scope"]}.',
expected_output='A problem statement saved to a file in the format: "This project aims to [goal] by [approach], addressing [gap] in [context]."',
agent=writer,
output_file='problem_statement.txt'
)
validation_task = Task(
description='Search arXiv and Semantic Scholar to ensure the problem statement in problem_statement.txt is novel and not duplicated in existing research.',
expected_output='A confirmation that the problem statement is novel, or suggestions for refinement if duplicates are found.',
agent=validator,
output_file='validation_result.txt'
)
# Assemble Crew
crew = Crew(
agents=[researcher, analyst, writer, validator],
tasks=[research_task, analysis_task, writing_task, validation_task],
verbose=True
)
# Run Crew and display results
with st.spinner("Generating Problem Statement..."):
result = crew.kickoff()
# Display results
st.subheader("Problem Statement")
with open('problem_statement.txt', 'r') as f:
st.write(f.read())
st.subheader("Validation Result")
with open('validation_result.txt', 'r') as f:
st.write(f.read())
st.subheader("Summaries (References)")
with open('summaries.txt', 'r') as f:
st.write(f.read())
st.success("Generation complete!")