File size: 17,252 Bytes
022961b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a16870
022961b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a16870
022961b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b2b56d5
 
022961b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3447bed
 
 
 
d326d22
022961b
fd99134
0c5315f
 
 
 
 
 
3447bed
022961b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
import streamlit as st
from docx import Document
import re
import io
import os
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from email.mime.text import MIMEText
from fpdf import FPDF
from dotenv import load_dotenv
from retrying import retry
from funtions import *
import logging 
import random
import time 
import newspaper
from newspaper import Article

max_prompt_lenth=6000
# Load environment variables from .env file
load_dotenv()

# Declare the exa search API
exa = Exa(api_key=os.getenv("EXA_API_KEY"))

# Define your API Model and key
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
utilized_model = "llama3-70b-8192"

# Set up logging
logging.basicConfig(filename="llm_errors.log", level=logging.ERROR)

# Functions for the Exa Search content & Parameters for Highlights search
highlights_options = {
    "num_sentences": 7,  # Length of highlights
    "highlights_per_url": 1,  # Get the best highlight for each URL
}

# Add title and author contact
st.title("Academic PhD Proposal Generator")

# Display the image using st.image
st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True)
#st.markdown("""
#**Website:** [Academic Resource](https://youruniversity.edu)
#""")
st.write("For collaboration, please contact the author πŸ‘‡")
st.write("Email: chatgpt4compas@gmail.com")
st.markdown("[WhatsApp contact πŸ“ž](https://web.whatsapp.com/send?phone=12085033653)")

def sanitize_filename(filename, max_length=10):
    """
    Sanitizes a filename by removing invalid characters and limiting the length to max_length.
    Only keeps alphanumeric characters and spaces.
    """
    # Remove invalid characters for file names (e.g., <>:"/\|?*)
    sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
    # Limit the length to the first max_length characters
    sanitized = sanitized[:max_length]
    return sanitized

@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def call_llm_old(prompt):
    search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True)
    info = [sr.highlights[0] for sr in search_response.results]
    
    system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal."
    user_prompt = f"Sources: {info}\nQuestion: {prompt}"
    
    completion = client.chat.completions.create(
        model=utilized_model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ]
    )
    return completion.choices[0].message.content

@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def call_llm(prompt, data, history,section_name):
    """
    Calls the LLM model to generate content, handling missing data fields by searching for context.
    :param prompt: The current prompt to generate content.
    :param data: The dictionary of input fields collected from the user.
    :param history: A list of previous prompts and responses to enhance the model's understanding.
    :return: Generated content based on the prompt and available data.
    """
    # Identify any missing fields
    missing_fields = [key for key, value in data.items() if not value]

    if missing_fields:
        # Create search queries for missing fields based on the research topic or related data
        search_queries = []
        for field in missing_fields:
            search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}."
            search_queries.append(search_query)

        # Combine the search queries with the history and current prompt
        search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \
                        f"History: {history}\n" \
                        f"Search Queries: {search_queries}\n" \
                        f"Original Prompt: {prompt}"
        prompt = search_prompt[:max_prompt_lenth-1]

    # Execute the model call
    system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields."

    # Customize the system prompt based on the section type for better focus
    if section_name == "Executive Summary":
        system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution."
    elif section_name == "Research Objectives":
        system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)."
    elif section_name == "Research Methodology":
        system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability."
    elif section_name == "Literature Review Outline":
        system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field."
    elif section_name == "Hypotheses":
        system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration."
    elif section_name == "Contribution Statement":
        system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding."
    elif section_name == "Research Timeline":
        system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe."
    elif section_name == "Limitations":
        system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors."
    elif section_name == "Future Work":
        system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings."
        
    completion = client.chat.completions.create(
        model=utilized_model,
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt},
        ]
    )
    return completion.choices[0].message.content

def delay_with_backoff(attempt):
    """
    Delay execution with an increasing backoff.
    Starts with a random delay between 7-9 seconds and increases exponentially
    on each attempt, with a maximum delay of 10 seconds.
    """
    delay = random.uniform(7, 9) * (2 ** (attempt - 1))
    delay = min(delay, 10)  # Cap the delay at 10 seconds
    time.sleep(delay)

def call_llm_with_retries(prompt, data, history, section_name, max_retries=3):
    """
    Calls the LLM model to generate content, retrying up to max_retries times in case of errors.
    Implements randomized delay between retries with exponential backoff.
    :param prompt: The current prompt to generate content.
    :param data: The dictionary of input fields collected from the user.
    :param history: A list of previous prompts and responses to enhance the model's understanding.
    :param section_name: The name of the current section being generated.
    :param max_retries: Maximum number of retry attempts (default: 3).
    :return: Generated content based on the prompt and available data, or error message after retries.
    """
    for attempt in range(1, max_retries + 1):
        try:
            # Attempt to call the LLM model
            return call_llm(prompt, data, history, section_name)
        
        except Exception as e:
            # Log the error and retry with delay
            logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}")
            
            # Print to the console or Streamlit interface
            st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...")
            
            # If maximum retries reached, return an error message
            if attempt == max_retries:
                return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later."

            # Delay with exponential backoff
            delay_with_backoff(attempt)
            st.write(f"Retrying {section_name} after delay...")

    return f"Error: Maximum retry attempts exceeded for {section_name}."
    
def extract_and_summarize_article(url):
    """
    Fetch and summarize content from a URL using the newspaper3k module.
    :param url: The URL to be scraped.
    :return: A summarized version of the article content.
    """
    try:
        article = Article(url)
        article.download()
        article.parse()
        article.nlp()  # Perform natural language processing to enable summarization
        return article.summary
    except Exception as e:
        logging.error(f"Error summarizing article from URL {url}: {str(e)}")
        return f"Error fetching or summarizing content from {url}"

def update_data_with_summaries(data):
    """
    Update the data dictionary by summarizing content from URLs present in the data.
    :param data: The original data dictionary.
    :return: A new dictionary (data_updated) with URL content summarized.
    """
    data_updated = data.copy()
    for key, value in data.items():
        # Check if the value is a URL by using a simple regex
        if isinstance(value, str) and re.match(r'http[s]?://', value):
            st.write(f"Fetching and summarizing content for URL in '{key}'...")
            summary = extract_and_summarize_article(value)
            data_updated[key] = summary
    return data_updated
def strip_md(text):
    text = text.replace("**", "").replace("*", "").replace("#", "")
    return re.sub(r'([!*_=~-])', r'\\\1', text)

def create_document():
    doc = Document()
    doc.add_heading("PhD Research Proposal", 0)
    return doc

def add_section_to_doc(doc, section_name, section_content):
    section_content = strip_md(section_content)
    section_content = section_content.replace("\\", "")  # Remove backslashes
    doc.add_heading(section_name, level=1)
    doc.add_paragraph(section_content)
    return doc

def get_docx_bytes(doc):
    doc_io = io.BytesIO()
    doc.save(doc_io)
    doc_io.seek(0)
    return doc_io

def send_email_with_attachment(to_email, subject, body, filename, section_content):
    from_email = os.getenv("EMAIL_USER")
    email_password = os.getenv("EMAIL_PASSWORD")

    msg = MIMEMultipart()
    msg['From'] = from_email
    msg['To'] = to_email
    msg['Subject'] = subject

    # Attach the body of the email
    msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain'))

    # Attach the DOCX file
    try:
        with open(filename, 'rb') as attachment:
            part = MIMEBase('application', 'octet-stream')
            part.set_payload(attachment.read())
            encoders.encode_base64(part)
            part.add_header('Content-Disposition', f'attachment; filename={filename}')
            msg.attach(part)

        # Send the email
        with smtplib.SMTP('smtp.gmail.com', 587) as server:
            server.starttls()
            server.login(from_email, email_password)
            server.send_message(msg)

        # Return success message
        return f"Email sent successfully to {to_email} for section '{subject}'."
    
    except Exception as e:
        return f"Failed to send email to {to_email}: {str(e)}"

def sanitize_filename_old(filename, max_length=100):
    sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
    return sanitized[:max_length]

def collect_basic_info():
    st.title("PhD Proposal Generator")

    # Basic Research Information
    # Checkbox to allow URL summarization
    summarize_urls = st.checkbox("Summarize URLs in data", value=False)
    
    research_topic = st.text_input("Research Topic")
    research_question = st.text_area("Research Question")
    objectives = st.text_area("Research Objectives (SMART)")
    methodology = st.text_area("Research Methodology")
    data_collection = st.text_area("Data Collection Methods")
    data_analysis = st.text_area("Data Analysis Methods")
    justification = st.text_area("Justification for Methodology")
    key_authors = st.text_area("Key Authors in the Field")
    recent_developments = st.text_area("Recent Developments in the Field")
    contribution = st.text_area("Contribution to the Field")
    literature_gap = st.text_area("Literature Gaps")
    timeline = st.text_area("Research Timeline (Phases and Deadlines)")
    total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)")  # Add this input field

    # Contact information
    st.write("## Contact Information")
    email = st.text_input("Email")
    whatsapp_number = st.text_input("WhatsApp Number")

    if st.button('Submit'):
        # Collect data
        data = {
            "research_topic": research_topic,
            "research_question": research_question,
            "objectives": objectives,
            "methodology": methodology,
            "data_collection": data_collection,
            "data_analysis": data_analysis,
            "justification": justification,
            "key_authors": key_authors,
            "recent_developments": recent_developments,
            "contribution": contribution,
            "literature_gap": literature_gap,
            "timeline": timeline,
            "total_timeframe": total_timeframe,  # Ensure this is added to the data dictionary
            "email": email,
            "whatsapp_number": whatsapp_number
        }

        # Initialize an empty history list to store the prompts and responses
        history = []

        # Summarize URLs if the user selected the option
        if summarize_urls:
            st.write("Summarizing URLs in the data...")
            data_updated = update_data_with_summaries(data)
        else:
            data_updated = data.copy()
        # Define the sections to process for an academic proposal
        sections_to_process = [
            ("Executive Summary", generate_executive_summary),
            ("Research Objectives", generate_research_objectives),
            ("Research Methodology", generate_methodology_section),
            ("Literature Review Outline", generate_literature_review_outline),
            ("Hypotheses", generate_hypotheses),
            ("Contribution Statement", generate_contribution_statement),
            ("Research Timeline", generate_research_timeline),
            ("Limitations", generate_limitations_section),
            ("Future Work", generate_future_work_section)
        ]

        # Sanitize the research topic for file names
        sanitized_topic = sanitize_filename(research_topic, max_length=50)

        # Create a new document
        doc = create_document()
        for section_name, generate_prompt_func in sections_to_process:
            # Generate prompt for each section
            prompt = generate_prompt_func(data_updated)
            
            # Call the LLM, passing the prompt, current data, and history
            section_content = call_llm(prompt, data, history,section_name)
            #section_content = call_llm_with_retries(prompt, data_updated, history, section_name)
            
            # Add the current prompt and response to the history
            history.append(f"{section_name}: {section_content}")
            
            # Display the generated content for this section
            st.subheader(section_name)
            st.write(section_content)
            
            # Update document and create download link
            doc = add_section_to_doc(doc, section_name, section_content)
            doc_bytes = get_docx_bytes(doc)

            st.download_button(
                label=f"Download {section_name} as DOCX",
                data=doc_bytes,
                file_name=f"{section_name.replace(' ', '_').lower()}.docx",
                mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
            )

        # Save document to file
        filename = f"PhD_Proposal_for_{sanitized_topic}.docx"
        with open(filename, 'wb') as f:
            f.write(doc_bytes.getbuffer())

            # Prepare files for upload (including the generated proposal)
            file_paths = [filename]
    
           # Upload the files to transfer.sh
            urls, html_content = upload_files_to_transfer_sh(file_paths)

            print(f"Proposal saved as {filepath}. Uploaded to transfer.sh: {', '.join(urls)}")

            # Display each link in Streamlit using st.markdown
            st.subheader("Uploaded File Links:")
            for url in upload_urls:
                st.markdown(f"[Click to download your file]({url})")
        
    
collect_basic_info()