File size: 9,598 Bytes
e7577ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce2de92
 
 
fb255ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce2de92
fb255ba
 
 
e7577ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
983c703
5801db0
e7577ef
 
 
ce2de92
e7577ef
 
 
 
 
 
 
 
 
 
 
6cc84f1
 
 
ce2de92
6cc84f1
 
ce2de92
e7577ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import streamlit as st
from docx import Document
import re
import io
import os
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from email.mime.text import MIMEText
from fpdf import FPDF
from dotenv import load_dotenv
from retrying import retry
from funtions import *
import logging
import random
import time
from newspaper import Article

# Load environment variables from .env file
load_dotenv()

# Declare the exa search API
exa = Exa(api_key=os.getenv("EXA_API_KEY"))

# Define your API Model and key
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
utilized_model = "llama3-70b-8192"

# Set up logging
logging.basicConfig(filename="llm_errors.log", level=logging.ERROR)

# Functions for the Exa Search content & Parameters for Highlights search
highlights_options = {
    "num_sentences": 7,
    "highlights_per_url": 1,
}

# Title and header
st.title("Academic PhD Proposal Generator")
st.image(
    "https://i.sstatic.net/jUkkO0Fd.jpg",
    caption="PhD Proposal Generator",
    use_column_width=True
)
st.write("For collaboration, please contact the author 👇")
st.write("Email: chatgpt4compas@gmail.com")
st.markdown("[WhatsApp contact 📞](https://web.whatsapp.com/send?phone=12085033653)")

def sanitize_filename(filename, max_length=10):
    sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)[:max_length]
    return sanitized

@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def call_llm(prompt, data, history, section_name):
    missing_fields = [key for key, value in data.items() if not value]

    if missing_fields:
        search_queries = [
            f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}."
            for field in missing_fields
        ]
        info = []
        for query in search_queries:
            search_response = exa.search_and_contents(
                query=query, highlights=highlights_options, num_results=3, use_autoprompt=True
            )
            info.append(search_response.results[0].highlights[0])

        prompt = f"Missing fields: {', '.join(missing_fields)}\nSource: {info}\nOriginal Prompt: {prompt}"

    system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question."
    section_prompts = {
        "Executive Summary": "Generate a concise, high-level summary of the research problem, methodology, and expected contribution.",
        "Research Objectives": "Write SMART research objectives (Specific, Measurable, Achievable, Relevant, Time-bound).",
        "Research Methodology": "Describe the research design, including data collection and analysis methods, and justify their suitability.",
        "Literature Review Outline": "Provide a literature review outline covering key authors, developments, and gaps.",
        "Hypotheses": "Generate hypotheses based on research questions, providing a basis for exploration.",
        "Contribution Statement": "Explain the unique contributions this research makes to the field.",
        "Research Timeline": "Create a detailed research timeline outlining phases and milestones.",
        "Limitations": "Analyze potential research limitations, including weaknesses in methodology or data collection.",
        "Future Work": "Discuss potential areas of future research building on the findings."
    }
    if section_name in section_prompts:
        system_prompt = section_prompts[section_name]

    completion = client.chat.completions.create(
        model=utilized_model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt},
        ]
    )
    return completion.choices[0].message.content

def extract_and_summarize_article(url):
    try:
        article = Article(url)
        article.download()
        article.parse()
        article.nlp()
        return article.summary
    except Exception as e:
        logging.error(f"Error summarizing article from URL {url}: {str(e)}")
        return f"Error fetching or summarizing content from {url}"

def update_data_with_summaries(data):
    data_updated = data.copy()
    for key, value in data.items():
        if isinstance(value, str) and re.match(r'http[s]?://', value):
            st.write(f"Fetching and summarizing content for URL in '{key}'...")
            summary = extract_and_summarize_article(value)
            data_updated[key] = summary
    return data_updated

def create_document():
    doc = Document()
    doc.add_heading("PhD Research Proposal", 0)
    return doc

def add_section_to_doc(doc, section_name, section_content):
    doc.add_heading(section_name, level=1)
    doc.add_paragraph(section_content)
    return doc

def add_suggested_title(section_name, section_content):
    suggested_title = f"Suggested Title: {section_name}"
    return f"{suggested_title}\n\n{section_content}"
def suggest_title(data):
    """
    Suggests a suitable title for the PhD proposal based on the provided data.
    :param data: Dictionary containing the research topic, question, and other details.
    :return: Suggested title as a string.
    """
    try:
        prompt = (
            f"Based on the following information, suggest a concise and compelling title for a PhD research proposal:\n\n"
            f"Research Topic: {data.get('research_topic', '')}\n"
            f"Research Question: {data.get('research_question', '')}\n"
            f"Objectives: {data.get('objectives', '')}\n"
            f"Contribution: {data.get('contribution', '')}\n\n"
            "The title should be less than 15 words, clear, and indicative of the research focus."
        )

        system_prompt = "You are a title generator for academic PhD proposals. Suggest a concise, impactful title."
        
        # Call the LLM to generate the title
        completion = client.chat.completions.create(
            model=utilized_model,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": prompt},
            ]
        )
        return completion.choices[0].message.content.strip()

    except Exception as e:
        logging.error(f"Error generating title: {str(e)}")
        return "Error: Unable to generate a title at this time."  
def get_docx_bytes(doc):
    doc_io = io.BytesIO()
    doc.save(doc_io)
    doc_io.seek(0)
    return doc_io

def collect_basic_info():
    st.write("## Basic Research Information")
    summarize_urls = st.checkbox("Summarize URLs in data", value=False)

    research_topic = st.text_input("Research Topic")
    research_question = st.text_area("Research Question")
    objectives = st.text_area("Research Objectives (SMART)")
    methodology = st.text_area("Research Methodology")
    data_collection = st.text_area("Data Collection Methods")
    data_analysis = st.text_area("Data Analysis Methods")
    justification = st.text_area("Justification for Methodology")
    key_authors = st.text_area("Key Authors in the Field")
    recent_developments = st.text_area("Recent Developments in the Field")
    contribution = st.text_area("Contribution to the Field")
    literature_gap = st.text_area("Literature Gaps")
    timeline = st.text_area("Research Timeline (Phases and Deadlines)")

    email = st.text_input("Email")

    if st.button('Submit'):
        data = {
            "research_topic": research_topic,
            "research_question": research_question,
            "objectives": objectives,
            "methodology": methodology,
            "data_collection": data_collection,
            "data_analysis": data_analysis,
            "justification": justification,
            "key_authors": key_authors,
            "recent_developments": recent_developments,
            "contribution": contribution,
            "literature_gap": literature_gap,
            "timeline": timeline,
            "email": email
        }

        history = []
        if summarize_urls:
            st.write("Summarizing URLs in the data...")
            data = update_data_with_summaries(data)

        sections = [
            "Executive Summary", "Research Objectives", "Research Methodology",
            "Literature Review Outline", "Hypotheses", "Contribution Statement",
            "Research Timeline", "Limitations", "Future Work"
        ]

        sanitized_topic = sanitize_filename(research_topic, max_length=50)

        doc = create_document()
        suggested_title = suggest_title(data)
        add_section_to_doc(doc,'',suggested_title)
        for section_name in sections:
            prompt = f"Generate content for {section_name}: {data}"
            section_content = call_llm(prompt, data, history, section_name)
            section_content = add_suggested_title(section_name, section_content)
            history.append(f"{section_name}: {section_content}")

            st.subheader(section_name)
            st.write(section_content)

            doc = add_section_to_doc(doc, section_name, section_content)
            doc_bytes = get_docx_bytes(doc)

        filename = f"PhD_Proposal_for_{sanitized_topic}.docx"
        with open(filename, 'wb') as f:
            f.write(doc_bytes.getbuffer())
        st.download_button(
                label=f"Download final report as DOCX",
                data=doc_bytes,
                file_name= filename,
                mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
        )

collect_basic_info()