SoDa12321's picture
Update app.py
5a16870 verified
import streamlit as st
from docx import Document
import re
import io
import os
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from email.mime.text import MIMEText
from fpdf import FPDF
from dotenv import load_dotenv
from retrying import retry
from funtions import *
import logging
import random
import time
import newspaper
from newspaper import Article
max_prompt_lenth=6000
# Load environment variables from .env file
load_dotenv()
# Declare the exa search API
exa = Exa(api_key=os.getenv("EXA_API_KEY"))
# Define your API Model and key
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
utilized_model = "llama3-70b-8192"
# Set up logging
logging.basicConfig(filename="llm_errors.log", level=logging.ERROR)
# Functions for the Exa Search content & Parameters for Highlights search
highlights_options = {
"num_sentences": 7, # Length of highlights
"highlights_per_url": 1, # Get the best highlight for each URL
}
# Add title and author contact
st.title("Academic PhD Proposal Generator")
# Display the image using st.image
st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True)
#st.markdown("""
#**Website:** [Academic Resource](https://youruniversity.edu)
#""")
st.write("For collaboration, please contact the author πŸ‘‡")
st.write("Email: chatgpt4compas@gmail.com")
st.markdown("[WhatsApp contact πŸ“ž](https://web.whatsapp.com/send?phone=12085033653)")
def sanitize_filename(filename, max_length=10):
"""
Sanitizes a filename by removing invalid characters and limiting the length to max_length.
Only keeps alphanumeric characters and spaces.
"""
# Remove invalid characters for file names (e.g., <>:"/\|?*)
sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
# Limit the length to the first max_length characters
sanitized = sanitized[:max_length]
return sanitized
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def call_llm_old(prompt):
search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True)
info = [sr.highlights[0] for sr in search_response.results]
system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal."
user_prompt = f"Sources: {info}\nQuestion: {prompt}"
completion = client.chat.completions.create(
model=utilized_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
)
return completion.choices[0].message.content
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def call_llm(prompt, data, history,section_name):
"""
Calls the LLM model to generate content, handling missing data fields by searching for context.
:param prompt: The current prompt to generate content.
:param data: The dictionary of input fields collected from the user.
:param history: A list of previous prompts and responses to enhance the model's understanding.
:return: Generated content based on the prompt and available data.
"""
# Identify any missing fields
missing_fields = [key for key, value in data.items() if not value]
if missing_fields:
# Create search queries for missing fields based on the research topic or related data
search_queries = []
for field in missing_fields:
search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}."
search_queries.append(search_query)
# Combine the search queries with the history and current prompt
search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \
f"History: {history}\n" \
f"Search Queries: {search_queries}\n" \
f"Original Prompt: {prompt}"
prompt = search_prompt[:max_prompt_lenth-1]
# Execute the model call
system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields."
# Customize the system prompt based on the section type for better focus
if section_name == "Executive Summary":
system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution."
elif section_name == "Research Objectives":
system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)."
elif section_name == "Research Methodology":
system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability."
elif section_name == "Literature Review Outline":
system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field."
elif section_name == "Hypotheses":
system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration."
elif section_name == "Contribution Statement":
system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding."
elif section_name == "Research Timeline":
system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe."
elif section_name == "Limitations":
system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors."
elif section_name == "Future Work":
system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings."
completion = client.chat.completions.create(
model=utilized_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
)
return completion.choices[0].message.content
def delay_with_backoff(attempt):
"""
Delay execution with an increasing backoff.
Starts with a random delay between 7-9 seconds and increases exponentially
on each attempt, with a maximum delay of 10 seconds.
"""
delay = random.uniform(7, 9) * (2 ** (attempt - 1))
delay = min(delay, 10) # Cap the delay at 10 seconds
time.sleep(delay)
def call_llm_with_retries(prompt, data, history, section_name, max_retries=3):
"""
Calls the LLM model to generate content, retrying up to max_retries times in case of errors.
Implements randomized delay between retries with exponential backoff.
:param prompt: The current prompt to generate content.
:param data: The dictionary of input fields collected from the user.
:param history: A list of previous prompts and responses to enhance the model's understanding.
:param section_name: The name of the current section being generated.
:param max_retries: Maximum number of retry attempts (default: 3).
:return: Generated content based on the prompt and available data, or error message after retries.
"""
for attempt in range(1, max_retries + 1):
try:
# Attempt to call the LLM model
return call_llm(prompt, data, history, section_name)
except Exception as e:
# Log the error and retry with delay
logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}")
# Print to the console or Streamlit interface
st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...")
# If maximum retries reached, return an error message
if attempt == max_retries:
return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later."
# Delay with exponential backoff
delay_with_backoff(attempt)
st.write(f"Retrying {section_name} after delay...")
return f"Error: Maximum retry attempts exceeded for {section_name}."
def extract_and_summarize_article(url):
"""
Fetch and summarize content from a URL using the newspaper3k module.
:param url: The URL to be scraped.
:return: A summarized version of the article content.
"""
try:
article = Article(url)
article.download()
article.parse()
article.nlp() # Perform natural language processing to enable summarization
return article.summary
except Exception as e:
logging.error(f"Error summarizing article from URL {url}: {str(e)}")
return f"Error fetching or summarizing content from {url}"
def update_data_with_summaries(data):
"""
Update the data dictionary by summarizing content from URLs present in the data.
:param data: The original data dictionary.
:return: A new dictionary (data_updated) with URL content summarized.
"""
data_updated = data.copy()
for key, value in data.items():
# Check if the value is a URL by using a simple regex
if isinstance(value, str) and re.match(r'http[s]?://', value):
st.write(f"Fetching and summarizing content for URL in '{key}'...")
summary = extract_and_summarize_article(value)
data_updated[key] = summary
return data_updated
def strip_md(text):
text = text.replace("**", "").replace("*", "").replace("#", "")
return re.sub(r'([!*_=~-])', r'\\\1', text)
def create_document():
doc = Document()
doc.add_heading("PhD Research Proposal", 0)
return doc
def add_section_to_doc(doc, section_name, section_content):
section_content = strip_md(section_content)
section_content = section_content.replace("\\", "") # Remove backslashes
doc.add_heading(section_name, level=1)
doc.add_paragraph(section_content)
return doc
def get_docx_bytes(doc):
doc_io = io.BytesIO()
doc.save(doc_io)
doc_io.seek(0)
return doc_io
def send_email_with_attachment(to_email, subject, body, filename, section_content):
from_email = os.getenv("EMAIL_USER")
email_password = os.getenv("EMAIL_PASSWORD")
msg = MIMEMultipart()
msg['From'] = from_email
msg['To'] = to_email
msg['Subject'] = subject
# Attach the body of the email
msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain'))
# Attach the DOCX file
try:
with open(filename, 'rb') as attachment:
part = MIMEBase('application', 'octet-stream')
part.set_payload(attachment.read())
encoders.encode_base64(part)
part.add_header('Content-Disposition', f'attachment; filename={filename}')
msg.attach(part)
# Send the email
with smtplib.SMTP('smtp.gmail.com', 587) as server:
server.starttls()
server.login(from_email, email_password)
server.send_message(msg)
# Return success message
return f"Email sent successfully to {to_email} for section '{subject}'."
except Exception as e:
return f"Failed to send email to {to_email}: {str(e)}"
def sanitize_filename_old(filename, max_length=100):
sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
return sanitized[:max_length]
def collect_basic_info():
st.title("PhD Proposal Generator")
# Basic Research Information
# Checkbox to allow URL summarization
summarize_urls = st.checkbox("Summarize URLs in data", value=False)
research_topic = st.text_input("Research Topic")
research_question = st.text_area("Research Question")
objectives = st.text_area("Research Objectives (SMART)")
methodology = st.text_area("Research Methodology")
data_collection = st.text_area("Data Collection Methods")
data_analysis = st.text_area("Data Analysis Methods")
justification = st.text_area("Justification for Methodology")
key_authors = st.text_area("Key Authors in the Field")
recent_developments = st.text_area("Recent Developments in the Field")
contribution = st.text_area("Contribution to the Field")
literature_gap = st.text_area("Literature Gaps")
timeline = st.text_area("Research Timeline (Phases and Deadlines)")
total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)") # Add this input field
# Contact information
st.write("## Contact Information")
email = st.text_input("Email")
whatsapp_number = st.text_input("WhatsApp Number")
if st.button('Submit'):
# Collect data
data = {
"research_topic": research_topic,
"research_question": research_question,
"objectives": objectives,
"methodology": methodology,
"data_collection": data_collection,
"data_analysis": data_analysis,
"justification": justification,
"key_authors": key_authors,
"recent_developments": recent_developments,
"contribution": contribution,
"literature_gap": literature_gap,
"timeline": timeline,
"total_timeframe": total_timeframe, # Ensure this is added to the data dictionary
"email": email,
"whatsapp_number": whatsapp_number
}
# Initialize an empty history list to store the prompts and responses
history = []
# Summarize URLs if the user selected the option
if summarize_urls:
st.write("Summarizing URLs in the data...")
data_updated = update_data_with_summaries(data)
else:
data_updated = data.copy()
# Define the sections to process for an academic proposal
sections_to_process = [
("Executive Summary", generate_executive_summary),
("Research Objectives", generate_research_objectives),
("Research Methodology", generate_methodology_section),
("Literature Review Outline", generate_literature_review_outline),
("Hypotheses", generate_hypotheses),
("Contribution Statement", generate_contribution_statement),
("Research Timeline", generate_research_timeline),
("Limitations", generate_limitations_section),
("Future Work", generate_future_work_section)
]
# Sanitize the research topic for file names
sanitized_topic = sanitize_filename(research_topic, max_length=50)
# Create a new document
doc = create_document()
for section_name, generate_prompt_func in sections_to_process:
# Generate prompt for each section
prompt = generate_prompt_func(data_updated)
# Call the LLM, passing the prompt, current data, and history
section_content = call_llm(prompt, data, history,section_name)
#section_content = call_llm_with_retries(prompt, data_updated, history, section_name)
# Add the current prompt and response to the history
history.append(f"{section_name}: {section_content}")
# Display the generated content for this section
st.subheader(section_name)
st.write(section_content)
# Update document and create download link
doc = add_section_to_doc(doc, section_name, section_content)
doc_bytes = get_docx_bytes(doc)
st.download_button(
label=f"Download {section_name} as DOCX",
data=doc_bytes,
file_name=f"{section_name.replace(' ', '_').lower()}.docx",
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
# Save document to file
filename = f"PhD_Proposal_for_{sanitized_topic}.docx"
with open(filename, 'wb') as f:
f.write(doc_bytes.getbuffer())
# Prepare files for upload (including the generated proposal)
file_paths = [filename]
# Upload the files to transfer.sh
urls, html_content = upload_files_to_transfer_sh(file_paths)
print(f"Proposal saved as {filepath}. Uploaded to transfer.sh: {', '.join(urls)}")
# Display each link in Streamlit using st.markdown
st.subheader("Uploaded File Links:")
for url in upload_urls:
st.markdown(f"[Click to download your file]({url})")
collect_basic_info()