|
|
import streamlit as st |
|
|
from docx import Document |
|
|
import re |
|
|
import io |
|
|
import os |
|
|
import smtplib |
|
|
from email.mime.multipart import MIMEMultipart |
|
|
from email.mime.base import MIMEBase |
|
|
from email import encoders |
|
|
from email.mime.text import MIMEText |
|
|
from fpdf import FPDF |
|
|
from dotenv import load_dotenv |
|
|
from retrying import retry |
|
|
from funtions import * |
|
|
import logging |
|
|
import random |
|
|
import time |
|
|
import newspaper |
|
|
from newspaper import Article |
|
|
|
|
|
max_prompt_lenth=6000 |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
exa = Exa(api_key=os.getenv("EXA_API_KEY")) |
|
|
|
|
|
|
|
|
client = Groq(api_key=os.getenv("GROQ_API_KEY")) |
|
|
utilized_model = "llama3-70b-8192" |
|
|
|
|
|
|
|
|
logging.basicConfig(filename="llm_errors.log", level=logging.ERROR) |
|
|
|
|
|
|
|
|
highlights_options = { |
|
|
"num_sentences": 7, |
|
|
"highlights_per_url": 1, |
|
|
} |
|
|
|
|
|
|
|
|
st.title("Academic PhD Proposal Generator") |
|
|
|
|
|
|
|
|
st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True) |
|
|
|
|
|
|
|
|
|
|
|
st.write("For collaboration, please contact the author π") |
|
|
st.write("Email: chatgpt4compas@gmail.com") |
|
|
st.markdown("[WhatsApp contact π](https://web.whatsapp.com/send?phone=12085033653)") |
|
|
|
|
|
def sanitize_filename(filename, max_length=10): |
|
|
""" |
|
|
Sanitizes a filename by removing invalid characters and limiting the length to max_length. |
|
|
Only keeps alphanumeric characters and spaces. |
|
|
""" |
|
|
|
|
|
sanitized = re.sub(r'[<>:"/\\|?*]', '', filename) |
|
|
|
|
|
sanitized = sanitized[:max_length] |
|
|
return sanitized |
|
|
|
|
|
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5) |
|
|
def call_llm_old(prompt): |
|
|
search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True) |
|
|
info = [sr.highlights[0] for sr in search_response.results] |
|
|
|
|
|
system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal." |
|
|
user_prompt = f"Sources: {info}\nQuestion: {prompt}" |
|
|
|
|
|
completion = client.chat.completions.create( |
|
|
model=utilized_model, |
|
|
messages=[ |
|
|
{"role": "system", "content": system_prompt}, |
|
|
{"role": "user", "content": user_prompt}, |
|
|
] |
|
|
) |
|
|
return completion.choices[0].message.content |
|
|
|
|
|
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5) |
|
|
def call_llm(prompt, data, history,section_name): |
|
|
""" |
|
|
Calls the LLM model to generate content, handling missing data fields by searching for context. |
|
|
:param prompt: The current prompt to generate content. |
|
|
:param data: The dictionary of input fields collected from the user. |
|
|
:param history: A list of previous prompts and responses to enhance the model's understanding. |
|
|
:return: Generated content based on the prompt and available data. |
|
|
""" |
|
|
|
|
|
missing_fields = [key for key, value in data.items() if not value] |
|
|
|
|
|
if missing_fields: |
|
|
|
|
|
search_queries = [] |
|
|
for field in missing_fields: |
|
|
search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}." |
|
|
search_queries.append(search_query) |
|
|
|
|
|
|
|
|
search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \ |
|
|
f"History: {history}\n" \ |
|
|
f"Search Queries: {search_queries}\n" \ |
|
|
f"Original Prompt: {prompt}" |
|
|
prompt = search_prompt[:max_prompt_lenth-1] |
|
|
|
|
|
|
|
|
system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields." |
|
|
|
|
|
|
|
|
if section_name == "Executive Summary": |
|
|
system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution." |
|
|
elif section_name == "Research Objectives": |
|
|
system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)." |
|
|
elif section_name == "Research Methodology": |
|
|
system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability." |
|
|
elif section_name == "Literature Review Outline": |
|
|
system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field." |
|
|
elif section_name == "Hypotheses": |
|
|
system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration." |
|
|
elif section_name == "Contribution Statement": |
|
|
system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding." |
|
|
elif section_name == "Research Timeline": |
|
|
system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe." |
|
|
elif section_name == "Limitations": |
|
|
system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors." |
|
|
elif section_name == "Future Work": |
|
|
system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings." |
|
|
|
|
|
completion = client.chat.completions.create( |
|
|
model=utilized_model, |
|
|
messages=[ |
|
|
{"role": "system", "content": system_prompt}, |
|
|
{"role": "user", "content": prompt}, |
|
|
] |
|
|
) |
|
|
return completion.choices[0].message.content |
|
|
|
|
|
def delay_with_backoff(attempt): |
|
|
""" |
|
|
Delay execution with an increasing backoff. |
|
|
Starts with a random delay between 7-9 seconds and increases exponentially |
|
|
on each attempt, with a maximum delay of 10 seconds. |
|
|
""" |
|
|
delay = random.uniform(7, 9) * (2 ** (attempt - 1)) |
|
|
delay = min(delay, 10) |
|
|
time.sleep(delay) |
|
|
|
|
|
def call_llm_with_retries(prompt, data, history, section_name, max_retries=3): |
|
|
""" |
|
|
Calls the LLM model to generate content, retrying up to max_retries times in case of errors. |
|
|
Implements randomized delay between retries with exponential backoff. |
|
|
:param prompt: The current prompt to generate content. |
|
|
:param data: The dictionary of input fields collected from the user. |
|
|
:param history: A list of previous prompts and responses to enhance the model's understanding. |
|
|
:param section_name: The name of the current section being generated. |
|
|
:param max_retries: Maximum number of retry attempts (default: 3). |
|
|
:return: Generated content based on the prompt and available data, or error message after retries. |
|
|
""" |
|
|
for attempt in range(1, max_retries + 1): |
|
|
try: |
|
|
|
|
|
return call_llm(prompt, data, history, section_name) |
|
|
|
|
|
except Exception as e: |
|
|
|
|
|
logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}") |
|
|
|
|
|
|
|
|
st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...") |
|
|
|
|
|
|
|
|
if attempt == max_retries: |
|
|
return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later." |
|
|
|
|
|
|
|
|
delay_with_backoff(attempt) |
|
|
st.write(f"Retrying {section_name} after delay...") |
|
|
|
|
|
return f"Error: Maximum retry attempts exceeded for {section_name}." |
|
|
|
|
|
def extract_and_summarize_article(url): |
|
|
""" |
|
|
Fetch and summarize content from a URL using the newspaper3k module. |
|
|
:param url: The URL to be scraped. |
|
|
:return: A summarized version of the article content. |
|
|
""" |
|
|
try: |
|
|
article = Article(url) |
|
|
article.download() |
|
|
article.parse() |
|
|
article.nlp() |
|
|
return article.summary |
|
|
except Exception as e: |
|
|
logging.error(f"Error summarizing article from URL {url}: {str(e)}") |
|
|
return f"Error fetching or summarizing content from {url}" |
|
|
|
|
|
def update_data_with_summaries(data): |
|
|
""" |
|
|
Update the data dictionary by summarizing content from URLs present in the data. |
|
|
:param data: The original data dictionary. |
|
|
:return: A new dictionary (data_updated) with URL content summarized. |
|
|
""" |
|
|
data_updated = data.copy() |
|
|
for key, value in data.items(): |
|
|
|
|
|
if isinstance(value, str) and re.match(r'http[s]?://', value): |
|
|
st.write(f"Fetching and summarizing content for URL in '{key}'...") |
|
|
summary = extract_and_summarize_article(value) |
|
|
data_updated[key] = summary |
|
|
return data_updated |
|
|
def strip_md(text): |
|
|
text = text.replace("**", "").replace("*", "").replace("#", "") |
|
|
return re.sub(r'([!*_=~-])', r'\\\1', text) |
|
|
|
|
|
def create_document(): |
|
|
doc = Document() |
|
|
doc.add_heading("PhD Research Proposal", 0) |
|
|
return doc |
|
|
|
|
|
def add_section_to_doc(doc, section_name, section_content): |
|
|
section_content = strip_md(section_content) |
|
|
section_content = section_content.replace("\\", "") |
|
|
doc.add_heading(section_name, level=1) |
|
|
doc.add_paragraph(section_content) |
|
|
return doc |
|
|
|
|
|
def get_docx_bytes(doc): |
|
|
doc_io = io.BytesIO() |
|
|
doc.save(doc_io) |
|
|
doc_io.seek(0) |
|
|
return doc_io |
|
|
|
|
|
def send_email_with_attachment(to_email, subject, body, filename, section_content): |
|
|
from_email = os.getenv("EMAIL_USER") |
|
|
email_password = os.getenv("EMAIL_PASSWORD") |
|
|
|
|
|
msg = MIMEMultipart() |
|
|
msg['From'] = from_email |
|
|
msg['To'] = to_email |
|
|
msg['Subject'] = subject |
|
|
|
|
|
|
|
|
msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain')) |
|
|
|
|
|
|
|
|
try: |
|
|
with open(filename, 'rb') as attachment: |
|
|
part = MIMEBase('application', 'octet-stream') |
|
|
part.set_payload(attachment.read()) |
|
|
encoders.encode_base64(part) |
|
|
part.add_header('Content-Disposition', f'attachment; filename={filename}') |
|
|
msg.attach(part) |
|
|
|
|
|
|
|
|
with smtplib.SMTP('smtp.gmail.com', 587) as server: |
|
|
server.starttls() |
|
|
server.login(from_email, email_password) |
|
|
server.send_message(msg) |
|
|
|
|
|
|
|
|
return f"Email sent successfully to {to_email} for section '{subject}'." |
|
|
|
|
|
except Exception as e: |
|
|
return f"Failed to send email to {to_email}: {str(e)}" |
|
|
|
|
|
def sanitize_filename_old(filename, max_length=100): |
|
|
sanitized = re.sub(r'[<>:"/\\|?*]', '', filename) |
|
|
return sanitized[:max_length] |
|
|
|
|
|
def collect_basic_info(): |
|
|
st.title("PhD Proposal Generator") |
|
|
|
|
|
|
|
|
|
|
|
summarize_urls = st.checkbox("Summarize URLs in data", value=False) |
|
|
|
|
|
research_topic = st.text_input("Research Topic") |
|
|
research_question = st.text_area("Research Question") |
|
|
objectives = st.text_area("Research Objectives (SMART)") |
|
|
methodology = st.text_area("Research Methodology") |
|
|
data_collection = st.text_area("Data Collection Methods") |
|
|
data_analysis = st.text_area("Data Analysis Methods") |
|
|
justification = st.text_area("Justification for Methodology") |
|
|
key_authors = st.text_area("Key Authors in the Field") |
|
|
recent_developments = st.text_area("Recent Developments in the Field") |
|
|
contribution = st.text_area("Contribution to the Field") |
|
|
literature_gap = st.text_area("Literature Gaps") |
|
|
timeline = st.text_area("Research Timeline (Phases and Deadlines)") |
|
|
total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)") |
|
|
|
|
|
|
|
|
st.write("## Contact Information") |
|
|
email = st.text_input("Email") |
|
|
whatsapp_number = st.text_input("WhatsApp Number") |
|
|
|
|
|
if st.button('Submit'): |
|
|
|
|
|
data = { |
|
|
"research_topic": research_topic, |
|
|
"research_question": research_question, |
|
|
"objectives": objectives, |
|
|
"methodology": methodology, |
|
|
"data_collection": data_collection, |
|
|
"data_analysis": data_analysis, |
|
|
"justification": justification, |
|
|
"key_authors": key_authors, |
|
|
"recent_developments": recent_developments, |
|
|
"contribution": contribution, |
|
|
"literature_gap": literature_gap, |
|
|
"timeline": timeline, |
|
|
"total_timeframe": total_timeframe, |
|
|
"email": email, |
|
|
"whatsapp_number": whatsapp_number |
|
|
} |
|
|
|
|
|
|
|
|
history = [] |
|
|
|
|
|
|
|
|
if summarize_urls: |
|
|
st.write("Summarizing URLs in the data...") |
|
|
data_updated = update_data_with_summaries(data) |
|
|
else: |
|
|
data_updated = data.copy() |
|
|
|
|
|
sections_to_process = [ |
|
|
("Executive Summary", generate_executive_summary), |
|
|
("Research Objectives", generate_research_objectives), |
|
|
("Research Methodology", generate_methodology_section), |
|
|
("Literature Review Outline", generate_literature_review_outline), |
|
|
("Hypotheses", generate_hypotheses), |
|
|
("Contribution Statement", generate_contribution_statement), |
|
|
("Research Timeline", generate_research_timeline), |
|
|
("Limitations", generate_limitations_section), |
|
|
("Future Work", generate_future_work_section) |
|
|
] |
|
|
|
|
|
|
|
|
sanitized_topic = sanitize_filename(research_topic, max_length=50) |
|
|
|
|
|
|
|
|
doc = create_document() |
|
|
for section_name, generate_prompt_func in sections_to_process: |
|
|
|
|
|
prompt = generate_prompt_func(data_updated) |
|
|
|
|
|
|
|
|
section_content = call_llm(prompt, data, history,section_name) |
|
|
|
|
|
|
|
|
|
|
|
history.append(f"{section_name}: {section_content}") |
|
|
|
|
|
|
|
|
st.subheader(section_name) |
|
|
st.write(section_content) |
|
|
|
|
|
|
|
|
doc = add_section_to_doc(doc, section_name, section_content) |
|
|
doc_bytes = get_docx_bytes(doc) |
|
|
|
|
|
st.download_button( |
|
|
label=f"Download {section_name} as DOCX", |
|
|
data=doc_bytes, |
|
|
file_name=f"{section_name.replace(' ', '_').lower()}.docx", |
|
|
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" |
|
|
) |
|
|
|
|
|
|
|
|
filename = f"PhD_Proposal_for_{sanitized_topic}.docx" |
|
|
with open(filename, 'wb') as f: |
|
|
f.write(doc_bytes.getbuffer()) |
|
|
|
|
|
|
|
|
file_paths = [filename] |
|
|
|
|
|
|
|
|
urls, html_content = upload_files_to_transfer_sh(file_paths) |
|
|
|
|
|
print(f"Proposal saved as {filepath}. Uploaded to transfer.sh: {', '.join(urls)}") |
|
|
|
|
|
|
|
|
st.subheader("Uploaded File Links:") |
|
|
for url in upload_urls: |
|
|
st.markdown(f"[Click to download your file]({url})") |
|
|
|
|
|
|
|
|
collect_basic_info() |
|
|
|