File size: 17,252 Bytes
022961b 5a16870 022961b 5a16870 022961b b2b56d5 022961b 3447bed d326d22 022961b fd99134 0c5315f 3447bed 022961b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 |
import streamlit as st
from docx import Document
import re
import io
import os
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from email.mime.text import MIMEText
from fpdf import FPDF
from dotenv import load_dotenv
from retrying import retry
from funtions import *
import logging
import random
import time
import newspaper
from newspaper import Article
max_prompt_lenth=6000
# Load environment variables from .env file
load_dotenv()
# Declare the exa search API
exa = Exa(api_key=os.getenv("EXA_API_KEY"))
# Define your API Model and key
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
utilized_model = "llama3-70b-8192"
# Set up logging
logging.basicConfig(filename="llm_errors.log", level=logging.ERROR)
# Functions for the Exa Search content & Parameters for Highlights search
highlights_options = {
"num_sentences": 7, # Length of highlights
"highlights_per_url": 1, # Get the best highlight for each URL
}
# Add title and author contact
st.title("Academic PhD Proposal Generator")
# Display the image using st.image
st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True)
#st.markdown("""
#**Website:** [Academic Resource](https://youruniversity.edu)
#""")
st.write("For collaboration, please contact the author π")
st.write("Email: chatgpt4compas@gmail.com")
st.markdown("[WhatsApp contact π](https://web.whatsapp.com/send?phone=12085033653)")
def sanitize_filename(filename, max_length=10):
"""
Sanitizes a filename by removing invalid characters and limiting the length to max_length.
Only keeps alphanumeric characters and spaces.
"""
# Remove invalid characters for file names (e.g., <>:"/\|?*)
sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
# Limit the length to the first max_length characters
sanitized = sanitized[:max_length]
return sanitized
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def call_llm_old(prompt):
search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True)
info = [sr.highlights[0] for sr in search_response.results]
system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal."
user_prompt = f"Sources: {info}\nQuestion: {prompt}"
completion = client.chat.completions.create(
model=utilized_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
)
return completion.choices[0].message.content
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def call_llm(prompt, data, history,section_name):
"""
Calls the LLM model to generate content, handling missing data fields by searching for context.
:param prompt: The current prompt to generate content.
:param data: The dictionary of input fields collected from the user.
:param history: A list of previous prompts and responses to enhance the model's understanding.
:return: Generated content based on the prompt and available data.
"""
# Identify any missing fields
missing_fields = [key for key, value in data.items() if not value]
if missing_fields:
# Create search queries for missing fields based on the research topic or related data
search_queries = []
for field in missing_fields:
search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}."
search_queries.append(search_query)
# Combine the search queries with the history and current prompt
search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \
f"History: {history}\n" \
f"Search Queries: {search_queries}\n" \
f"Original Prompt: {prompt}"
prompt = search_prompt[:max_prompt_lenth-1]
# Execute the model call
system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields."
# Customize the system prompt based on the section type for better focus
if section_name == "Executive Summary":
system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution."
elif section_name == "Research Objectives":
system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)."
elif section_name == "Research Methodology":
system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability."
elif section_name == "Literature Review Outline":
system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field."
elif section_name == "Hypotheses":
system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration."
elif section_name == "Contribution Statement":
system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding."
elif section_name == "Research Timeline":
system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe."
elif section_name == "Limitations":
system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors."
elif section_name == "Future Work":
system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings."
completion = client.chat.completions.create(
model=utilized_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
)
return completion.choices[0].message.content
def delay_with_backoff(attempt):
"""
Delay execution with an increasing backoff.
Starts with a random delay between 7-9 seconds and increases exponentially
on each attempt, with a maximum delay of 10 seconds.
"""
delay = random.uniform(7, 9) * (2 ** (attempt - 1))
delay = min(delay, 10) # Cap the delay at 10 seconds
time.sleep(delay)
def call_llm_with_retries(prompt, data, history, section_name, max_retries=3):
"""
Calls the LLM model to generate content, retrying up to max_retries times in case of errors.
Implements randomized delay between retries with exponential backoff.
:param prompt: The current prompt to generate content.
:param data: The dictionary of input fields collected from the user.
:param history: A list of previous prompts and responses to enhance the model's understanding.
:param section_name: The name of the current section being generated.
:param max_retries: Maximum number of retry attempts (default: 3).
:return: Generated content based on the prompt and available data, or error message after retries.
"""
for attempt in range(1, max_retries + 1):
try:
# Attempt to call the LLM model
return call_llm(prompt, data, history, section_name)
except Exception as e:
# Log the error and retry with delay
logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}")
# Print to the console or Streamlit interface
st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...")
# If maximum retries reached, return an error message
if attempt == max_retries:
return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later."
# Delay with exponential backoff
delay_with_backoff(attempt)
st.write(f"Retrying {section_name} after delay...")
return f"Error: Maximum retry attempts exceeded for {section_name}."
def extract_and_summarize_article(url):
"""
Fetch and summarize content from a URL using the newspaper3k module.
:param url: The URL to be scraped.
:return: A summarized version of the article content.
"""
try:
article = Article(url)
article.download()
article.parse()
article.nlp() # Perform natural language processing to enable summarization
return article.summary
except Exception as e:
logging.error(f"Error summarizing article from URL {url}: {str(e)}")
return f"Error fetching or summarizing content from {url}"
def update_data_with_summaries(data):
"""
Update the data dictionary by summarizing content from URLs present in the data.
:param data: The original data dictionary.
:return: A new dictionary (data_updated) with URL content summarized.
"""
data_updated = data.copy()
for key, value in data.items():
# Check if the value is a URL by using a simple regex
if isinstance(value, str) and re.match(r'http[s]?://', value):
st.write(f"Fetching and summarizing content for URL in '{key}'...")
summary = extract_and_summarize_article(value)
data_updated[key] = summary
return data_updated
def strip_md(text):
text = text.replace("**", "").replace("*", "").replace("#", "")
return re.sub(r'([!*_=~-])', r'\\\1', text)
def create_document():
doc = Document()
doc.add_heading("PhD Research Proposal", 0)
return doc
def add_section_to_doc(doc, section_name, section_content):
section_content = strip_md(section_content)
section_content = section_content.replace("\\", "") # Remove backslashes
doc.add_heading(section_name, level=1)
doc.add_paragraph(section_content)
return doc
def get_docx_bytes(doc):
doc_io = io.BytesIO()
doc.save(doc_io)
doc_io.seek(0)
return doc_io
def send_email_with_attachment(to_email, subject, body, filename, section_content):
from_email = os.getenv("EMAIL_USER")
email_password = os.getenv("EMAIL_PASSWORD")
msg = MIMEMultipart()
msg['From'] = from_email
msg['To'] = to_email
msg['Subject'] = subject
# Attach the body of the email
msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain'))
# Attach the DOCX file
try:
with open(filename, 'rb') as attachment:
part = MIMEBase('application', 'octet-stream')
part.set_payload(attachment.read())
encoders.encode_base64(part)
part.add_header('Content-Disposition', f'attachment; filename={filename}')
msg.attach(part)
# Send the email
with smtplib.SMTP('smtp.gmail.com', 587) as server:
server.starttls()
server.login(from_email, email_password)
server.send_message(msg)
# Return success message
return f"Email sent successfully to {to_email} for section '{subject}'."
except Exception as e:
return f"Failed to send email to {to_email}: {str(e)}"
def sanitize_filename_old(filename, max_length=100):
sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
return sanitized[:max_length]
def collect_basic_info():
st.title("PhD Proposal Generator")
# Basic Research Information
# Checkbox to allow URL summarization
summarize_urls = st.checkbox("Summarize URLs in data", value=False)
research_topic = st.text_input("Research Topic")
research_question = st.text_area("Research Question")
objectives = st.text_area("Research Objectives (SMART)")
methodology = st.text_area("Research Methodology")
data_collection = st.text_area("Data Collection Methods")
data_analysis = st.text_area("Data Analysis Methods")
justification = st.text_area("Justification for Methodology")
key_authors = st.text_area("Key Authors in the Field")
recent_developments = st.text_area("Recent Developments in the Field")
contribution = st.text_area("Contribution to the Field")
literature_gap = st.text_area("Literature Gaps")
timeline = st.text_area("Research Timeline (Phases and Deadlines)")
total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)") # Add this input field
# Contact information
st.write("## Contact Information")
email = st.text_input("Email")
whatsapp_number = st.text_input("WhatsApp Number")
if st.button('Submit'):
# Collect data
data = {
"research_topic": research_topic,
"research_question": research_question,
"objectives": objectives,
"methodology": methodology,
"data_collection": data_collection,
"data_analysis": data_analysis,
"justification": justification,
"key_authors": key_authors,
"recent_developments": recent_developments,
"contribution": contribution,
"literature_gap": literature_gap,
"timeline": timeline,
"total_timeframe": total_timeframe, # Ensure this is added to the data dictionary
"email": email,
"whatsapp_number": whatsapp_number
}
# Initialize an empty history list to store the prompts and responses
history = []
# Summarize URLs if the user selected the option
if summarize_urls:
st.write("Summarizing URLs in the data...")
data_updated = update_data_with_summaries(data)
else:
data_updated = data.copy()
# Define the sections to process for an academic proposal
sections_to_process = [
("Executive Summary", generate_executive_summary),
("Research Objectives", generate_research_objectives),
("Research Methodology", generate_methodology_section),
("Literature Review Outline", generate_literature_review_outline),
("Hypotheses", generate_hypotheses),
("Contribution Statement", generate_contribution_statement),
("Research Timeline", generate_research_timeline),
("Limitations", generate_limitations_section),
("Future Work", generate_future_work_section)
]
# Sanitize the research topic for file names
sanitized_topic = sanitize_filename(research_topic, max_length=50)
# Create a new document
doc = create_document()
for section_name, generate_prompt_func in sections_to_process:
# Generate prompt for each section
prompt = generate_prompt_func(data_updated)
# Call the LLM, passing the prompt, current data, and history
section_content = call_llm(prompt, data, history,section_name)
#section_content = call_llm_with_retries(prompt, data_updated, history, section_name)
# Add the current prompt and response to the history
history.append(f"{section_name}: {section_content}")
# Display the generated content for this section
st.subheader(section_name)
st.write(section_content)
# Update document and create download link
doc = add_section_to_doc(doc, section_name, section_content)
doc_bytes = get_docx_bytes(doc)
st.download_button(
label=f"Download {section_name} as DOCX",
data=doc_bytes,
file_name=f"{section_name.replace(' ', '_').lower()}.docx",
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
# Save document to file
filename = f"PhD_Proposal_for_{sanitized_topic}.docx"
with open(filename, 'wb') as f:
f.write(doc_bytes.getbuffer())
# Prepare files for upload (including the generated proposal)
file_paths = [filename]
# Upload the files to transfer.sh
urls, html_content = upload_files_to_transfer_sh(file_paths)
print(f"Proposal saved as {filepath}. Uploaded to transfer.sh: {', '.join(urls)}")
# Display each link in Streamlit using st.markdown
st.subheader("Uploaded File Links:")
for url in upload_urls:
st.markdown(f"[Click to download your file]({url})")
collect_basic_info()
|