File size: 6,378 Bytes
c80bad5
ea60fd3
a40df4c
ea60fd3
 
 
 
 
 
 
 
da8b47d
c80bad5
 
b3c66c7
 
21d1f77
 
 
b3c66c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab1d389
b3c66c7
ab1d389
 
b3c66c7
ab1d389
b3c66c7
ab1d389
 
b3c66c7
ab1d389
 
b3c66c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import os
import pathlib

# Fix directory permissions for Hugging Face / Docker
os.environ["MEM0_DIR"] = "/tmp/.mem0"
os.environ["EMBEDCHAIN_DIR"] = "/tmp/.embedchain"
os.environ["HOME"] = "/tmp"

# Patch path functions to use /tmp
os.path.expanduser = lambda path: path.replace("~", "/tmp")
pathlib.Path.home = lambda: pathlib.Path("/tmp")



import streamlit as st
from crewai import Agent, Task, Crew
from crewai_tools import SerperDevTool
from crewai.tools import BaseTool

import arxiv
import os

# Custom ArxivSearchTool
class ArxivSearchTool(BaseTool):
    name: str = "ArxivSearch"
    description: str = "Tool to search scientific papers from arXiv"

    def _run(self, query: str) -> str:
        results = list(arxiv.Search(query=query, max_results=3).results())
        return "\n".join(f"{r.title} - {r.entry_id}" for r in results)

# Custom FileIOTool
class FileIOTool(BaseTool):
    name: str = "FileIOTool"
    description: str = "Tool to read from and write to files"

    def _run(self, action: str, filename: str, content: str = None) -> str:
        if action == "read":
            try:
                with open(filename, 'r') as f:
                    return f.read()
            except FileNotFoundError:
                return f"Error: File {filename} not found."
        elif action == "write":
            with open(filename, 'w') as f:
                f.write(content)
            return f"Content written to {filename}"
        else:
            return "Error: Invalid action. Use 'read' or 'write'."

import os

if os.getenv("SERPER_API_KEY"):
    os.environ["SERPER_API_KEY"] = os.getenv("SERPER_API_KEY")
else:
    st.error("SERPER_API_KEY not found in environment.")

if os.getenv("OPENAI_API_KEY"):
    os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
else:
    st.error("OPENAI_API_KEY not found in environment.")


# User Inputs Form
with st.form(key='user_inputs_form'):
    field = st.text_input("Field of Study", value="Biology")
    interest = st.text_input("Specific Interest", value="Genetics, CRISPR")
    academic_level = st.text_input("Academic Level", value="Undergraduate")
    resources = st.text_input("Available Resources", value="Python, bioinformatics tools, open-source datasets")
    scope = st.text_input("Project Scope", value="3-month project")
    preference = st.text_input("Preference", value="Climate change solutions")
    submit_button = st.form_submit_button(label="Generate Problem Statement")

if submit_button:
    # User inputs dictionary
    user_inputs = {
        "field": field,
        "interest": interest,
        "academic_level": academic_level,
        "resources": resources,
        "scope": scope,
        "preference": preference
    }

    # Instantiate tools
    search_tool = SerperDevTool()
    file_io_tool = FileIOTool()
    arxiv_tool = ArxivSearchTool()

    # Define Agents
    researcher = Agent(
        role='Researcher',
        goal='Find open-access genetics papers from arXiv and Semantic Scholar',
        backstory='Expert in sourcing academic literature from archives.',
        tools=[search_tool, arxiv_tool, file_io_tool],
        llm="openai/gpt-4o-mini",
        verbose=True
    )

    analyst = Agent(
        role='Analyst',
        goal='Identify novel research gaps for undergraduate projects',
        backstory='Skilled at spotting underexplored areas in research.',
        tools=[file_io_tool],
        llm="openai/gpt-4o-mini",
        verbose=True
    )

    writer = Agent(
        role='Writer',
        goal='Craft clear, novel problem statements for students',
        backstory='Expert in translating research gaps into actionable project aims.',
        tools=[file_io_tool],
        llm="openai/gpt-4o-mini",
        verbose=True
    )

    validator = Agent(
        role='Validator',
        goal='Ensure the novelty of the problem statement',
        backstory='Expert in verifying originality by cross-checking with existing research.',
        tools=[search_tool, arxiv_tool],
        llm="openai/gpt-4o-mini",
        verbose=True
    )

    # Define Tasks
    research_task = Task(
        description=f'Search arXiv and Semantic Scholar for open-access papers on {user_inputs["interest"]} from 2024–2025. Save abstracts to a file.',
        expected_output='A text file with 3–5 paper summaries.',
        agent=researcher,
        output_file='summaries.txt'
    )

    analysis_task = Task(
        description=f'Analyze summaries.txt to identify a novel research gap suitable for an {user_inputs["academic_level"]} in {user_inputs["field"]}.',
        expected_output='A clear description of a research gap.',
        agent=analyst
    )

    writing_task = Task(
        description=f'Generate a problem statement for an {user_inputs["academic_level"]} in {user_inputs["field"]} interested in {user_inputs["interest"]}, using the identified gap. Include feasibility for {user_inputs["resources"]} and {user_inputs["scope"]}.',
        expected_output='A problem statement saved to a file in the format: "This project aims to [goal] by [approach], addressing [gap] in [context]."',
        agent=writer,
        output_file='problem_statement.txt'
    )

    validation_task = Task(
        description='Search arXiv and Semantic Scholar to ensure the problem statement in problem_statement.txt is novel and not duplicated in existing research.',
        expected_output='A confirmation that the problem statement is novel, or suggestions for refinement if duplicates are found.',
        agent=validator,
        output_file='validation_result.txt'
    )

    # Assemble Crew
    crew = Crew(
        agents=[researcher, analyst, writer, validator],
        tasks=[research_task, analysis_task, writing_task, validation_task],
        verbose=True
    )

    # Run Crew and display results
    with st.spinner("Generating Problem Statement..."):
        result = crew.kickoff()

        # Display results
        st.subheader("Problem Statement")
        with open('problem_statement.txt', 'r') as f:
            st.write(f.read())

        st.subheader("Validation Result")
        with open('validation_result.txt', 'r') as f:
            st.write(f.read())

        st.subheader("Summaries (References)")
        with open('summaries.txt', 'r') as f:
            st.write(f.read())

        st.success("Generation complete!")