sosa123454321's picture
Update app.py
5801db0 verified
import streamlit as st
from docx import Document
import re
import io
import os
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.base import MIMEBase
from email import encoders
from email.mime.text import MIMEText
from fpdf import FPDF
from dotenv import load_dotenv
from retrying import retry
from funtions import *
import logging
import random
import time
from newspaper import Article
# Load environment variables from .env file
load_dotenv()
# Declare the exa search API
exa = Exa(api_key=os.getenv("EXA_API_KEY"))
# Define your API Model and key
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
utilized_model = "llama3-70b-8192"
# Set up logging
logging.basicConfig(filename="llm_errors.log", level=logging.ERROR)
# Functions for the Exa Search content & Parameters for Highlights search
highlights_options = {
"num_sentences": 7,
"highlights_per_url": 1,
}
# Title and header
st.title("Academic PhD Proposal Generator")
st.image(
"https://i.sstatic.net/jUkkO0Fd.jpg",
caption="PhD Proposal Generator",
use_column_width=True
)
st.write("For collaboration, please contact the author 👇")
st.write("Email: chatgpt4compas@gmail.com")
st.markdown("[WhatsApp contact 📞](https://web.whatsapp.com/send?phone=12085033653)")
def sanitize_filename(filename, max_length=10):
sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)[:max_length]
return sanitized
@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
def call_llm(prompt, data, history, section_name):
missing_fields = [key for key, value in data.items() if not value]
if missing_fields:
search_queries = [
f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}."
for field in missing_fields
]
info = []
for query in search_queries:
search_response = exa.search_and_contents(
query=query, highlights=highlights_options, num_results=3, use_autoprompt=True
)
info.append(search_response.results[0].highlights[0])
prompt = f"Missing fields: {', '.join(missing_fields)}\nSource: {info}\nOriginal Prompt: {prompt}"
system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question."
section_prompts = {
"Executive Summary": "Generate a concise, high-level summary of the research problem, methodology, and expected contribution.",
"Research Objectives": "Write SMART research objectives (Specific, Measurable, Achievable, Relevant, Time-bound).",
"Research Methodology": "Describe the research design, including data collection and analysis methods, and justify their suitability.",
"Literature Review Outline": "Provide a literature review outline covering key authors, developments, and gaps.",
"Hypotheses": "Generate hypotheses based on research questions, providing a basis for exploration.",
"Contribution Statement": "Explain the unique contributions this research makes to the field.",
"Research Timeline": "Create a detailed research timeline outlining phases and milestones.",
"Limitations": "Analyze potential research limitations, including weaknesses in methodology or data collection.",
"Future Work": "Discuss potential areas of future research building on the findings."
}
if section_name in section_prompts:
system_prompt = section_prompts[section_name]
completion = client.chat.completions.create(
model=utilized_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
)
return completion.choices[0].message.content
def extract_and_summarize_article(url):
try:
article = Article(url)
article.download()
article.parse()
article.nlp()
return article.summary
except Exception as e:
logging.error(f"Error summarizing article from URL {url}: {str(e)}")
return f"Error fetching or summarizing content from {url}"
def update_data_with_summaries(data):
data_updated = data.copy()
for key, value in data.items():
if isinstance(value, str) and re.match(r'http[s]?://', value):
st.write(f"Fetching and summarizing content for URL in '{key}'...")
summary = extract_and_summarize_article(value)
data_updated[key] = summary
return data_updated
def create_document():
doc = Document()
doc.add_heading("PhD Research Proposal", 0)
return doc
def add_section_to_doc(doc, section_name, section_content):
doc.add_heading(section_name, level=1)
doc.add_paragraph(section_content)
return doc
def add_suggested_title(section_name, section_content):
suggested_title = f"Suggested Title: {section_name}"
return f"{suggested_title}\n\n{section_content}"
def suggest_title(data):
"""
Suggests a suitable title for the PhD proposal based on the provided data.
:param data: Dictionary containing the research topic, question, and other details.
:return: Suggested title as a string.
"""
try:
prompt = (
f"Based on the following information, suggest a concise and compelling title for a PhD research proposal:\n\n"
f"Research Topic: {data.get('research_topic', '')}\n"
f"Research Question: {data.get('research_question', '')}\n"
f"Objectives: {data.get('objectives', '')}\n"
f"Contribution: {data.get('contribution', '')}\n\n"
"The title should be less than 15 words, clear, and indicative of the research focus."
)
system_prompt = "You are a title generator for academic PhD proposals. Suggest a concise, impactful title."
# Call the LLM to generate the title
completion = client.chat.completions.create(
model=utilized_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
)
return completion.choices[0].message.content.strip()
except Exception as e:
logging.error(f"Error generating title: {str(e)}")
return "Error: Unable to generate a title at this time."
def get_docx_bytes(doc):
doc_io = io.BytesIO()
doc.save(doc_io)
doc_io.seek(0)
return doc_io
def collect_basic_info():
st.write("## Basic Research Information")
summarize_urls = st.checkbox("Summarize URLs in data", value=False)
research_topic = st.text_input("Research Topic")
research_question = st.text_area("Research Question")
objectives = st.text_area("Research Objectives (SMART)")
methodology = st.text_area("Research Methodology")
data_collection = st.text_area("Data Collection Methods")
data_analysis = st.text_area("Data Analysis Methods")
justification = st.text_area("Justification for Methodology")
key_authors = st.text_area("Key Authors in the Field")
recent_developments = st.text_area("Recent Developments in the Field")
contribution = st.text_area("Contribution to the Field")
literature_gap = st.text_area("Literature Gaps")
timeline = st.text_area("Research Timeline (Phases and Deadlines)")
email = st.text_input("Email")
if st.button('Submit'):
data = {
"research_topic": research_topic,
"research_question": research_question,
"objectives": objectives,
"methodology": methodology,
"data_collection": data_collection,
"data_analysis": data_analysis,
"justification": justification,
"key_authors": key_authors,
"recent_developments": recent_developments,
"contribution": contribution,
"literature_gap": literature_gap,
"timeline": timeline,
"email": email
}
history = []
if summarize_urls:
st.write("Summarizing URLs in the data...")
data = update_data_with_summaries(data)
sections = [
"Executive Summary", "Research Objectives", "Research Methodology",
"Literature Review Outline", "Hypotheses", "Contribution Statement",
"Research Timeline", "Limitations", "Future Work"
]
sanitized_topic = sanitize_filename(research_topic, max_length=50)
doc = create_document()
suggested_title = suggest_title(data)
add_section_to_doc(doc,'',suggested_title)
for section_name in sections:
prompt = f"Generate content for {section_name}: {data}"
section_content = call_llm(prompt, data, history, section_name)
section_content = add_suggested_title(section_name, section_content)
history.append(f"{section_name}: {section_content}")
st.subheader(section_name)
st.write(section_content)
doc = add_section_to_doc(doc, section_name, section_content)
doc_bytes = get_docx_bytes(doc)
filename = f"PhD_Proposal_for_{sanitized_topic}.docx"
with open(filename, 'wb') as f:
f.write(doc_bytes.getbuffer())
st.download_button(
label=f"Download final report as DOCX",
data=doc_bytes,
file_name= filename,
mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
)
collect_basic_info()