Spaces:

SoDa12321
/

Academic_Proposal_generator_As_AI_Compassion

Sleeping

App Files Files Community

Academic_Proposal_generator_As_AI_Compassion / app.py

SoDa12321

Update app.py

5a16870 verified about 1 year ago

raw

history blame contribute delete

17.3 kB

	import streamlit as st
	from docx import Document
	import re
	import io
	import os
	import smtplib
	from email.mime.multipart import MIMEMultipart
	from email.mime.base import MIMEBase
	from email import encoders
	from email.mime.text import MIMEText
	from fpdf import FPDF
	from dotenv import load_dotenv
	from retrying import retry
	from funtions import *
	import logging
	import random
	import time
	import newspaper
	from newspaper import Article

	max_prompt_lenth=6000
	# Load environment variables from .env file
	load_dotenv()

	# Declare the exa search API
	exa = Exa(api_key=os.getenv("EXA_API_KEY"))

	# Define your API Model and key
	client = Groq(api_key=os.getenv("GROQ_API_KEY"))
	utilized_model = "llama3-70b-8192"

	# Set up logging
	logging.basicConfig(filename="llm_errors.log", level=logging.ERROR)

	# Functions for the Exa Search content & Parameters for Highlights search
	highlights_options = {
	"num_sentences": 7, # Length of highlights
	"highlights_per_url": 1, # Get the best highlight for each URL
	}

	# Add title and author contact
	st.title("Academic PhD Proposal Generator")

	# Display the image using st.image
	st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True)
	#st.markdown("""
	#Website: [Academic Resource](https://youruniversity.edu)
	#""")
	st.write("For collaboration, please contact the author 👇")
	st.write("Email: chatgpt4compas@gmail.com")
	st.markdown("[WhatsApp contact 📞](https://web.whatsapp.com/send?phone=12085033653)")

	def sanitize_filename(filename, max_length=10):
	"""
	Sanitizes a filename by removing invalid characters and limiting the length to max_length.
	Only keeps alphanumeric characters and spaces.
	"""
	# Remove invalid characters for file names (e.g., <>:"/\\|?*)
	sanitized = re.sub(r'[<>:"/\\\|?*]', '', filename)
	# Limit the length to the first max_length characters
	sanitized = sanitized[:max_length]
	return sanitized

	@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
	def call_llm_old(prompt):
	search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True)
	info = [sr.highlights[0] for sr in search_response.results]

	system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal."
	user_prompt = f"Sources: {info}\nQuestion: {prompt}"

	completion = client.chat.completions.create(
	model=utilized_model,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt},
	]
	)
	return completion.choices[0].message.content

	@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
	def call_llm(prompt, data, history,section_name):
	"""
	Calls the LLM model to generate content, handling missing data fields by searching for context.
	:param prompt: The current prompt to generate content.
	:param data: The dictionary of input fields collected from the user.
	:param history: A list of previous prompts and responses to enhance the model's understanding.
	:return: Generated content based on the prompt and available data.
	"""
	# Identify any missing fields
	missing_fields = [key for key, value in data.items() if not value]

	if missing_fields:
	# Create search queries for missing fields based on the research topic or related data
	search_queries = []
	for field in missing_fields:
	search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}."
	search_queries.append(search_query)

	# Combine the search queries with the history and current prompt
	search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \
	f"History: {history}\n" \
	f"Search Queries: {search_queries}\n" \
	f"Original Prompt: {prompt}"
	prompt = search_prompt[:max_prompt_lenth-1]

	# Execute the model call
	system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields."

	# Customize the system prompt based on the section type for better focus
	if section_name == "Executive Summary":
	system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution."
	elif section_name == "Research Objectives":
	system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)."
	elif section_name == "Research Methodology":
	system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability."
	elif section_name == "Literature Review Outline":
	system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field."
	elif section_name == "Hypotheses":
	system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration."
	elif section_name == "Contribution Statement":
	system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding."
	elif section_name == "Research Timeline":
	system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe."
	elif section_name == "Limitations":
	system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors."
	elif section_name == "Future Work":
	system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings."

	completion = client.chat.completions.create(
	model=utilized_model,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]
	)
	return completion.choices[0].message.content

	def delay_with_backoff(attempt):
	"""
	Delay execution with an increasing backoff.
	Starts with a random delay between 7-9 seconds and increases exponentially
	on each attempt, with a maximum delay of 10 seconds.
	"""
	delay = random.uniform(7, 9) * (2 ** (attempt - 1))
	delay = min(delay, 10) # Cap the delay at 10 seconds
	time.sleep(delay)

	def call_llm_with_retries(prompt, data, history, section_name, max_retries=3):
	"""
	Calls the LLM model to generate content, retrying up to max_retries times in case of errors.
	Implements randomized delay between retries with exponential backoff.
	:param prompt: The current prompt to generate content.
	:param data: The dictionary of input fields collected from the user.
	:param history: A list of previous prompts and responses to enhance the model's understanding.
	:param section_name: The name of the current section being generated.
	:param max_retries: Maximum number of retry attempts (default: 3).
	:return: Generated content based on the prompt and available data, or error message after retries.
	"""
	for attempt in range(1, max_retries + 1):
	try:
	# Attempt to call the LLM model
	return call_llm(prompt, data, history, section_name)

	except Exception as e:
	# Log the error and retry with delay
	logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}")

	# Print to the console or Streamlit interface
	st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...")

	# If maximum retries reached, return an error message
	if attempt == max_retries:
	return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later."

	# Delay with exponential backoff
	delay_with_backoff(attempt)
	st.write(f"Retrying {section_name} after delay...")

	return f"Error: Maximum retry attempts exceeded for {section_name}."

	def extract_and_summarize_article(url):
	"""
	Fetch and summarize content from a URL using the newspaper3k module.
	:param url: The URL to be scraped.
	:return: A summarized version of the article content.
	"""
	try:
	article = Article(url)
	article.download()
	article.parse()
	article.nlp() # Perform natural language processing to enable summarization
	return article.summary
	except Exception as e:
	logging.error(f"Error summarizing article from URL {url}: {str(e)}")
	return f"Error fetching or summarizing content from {url}"

	def update_data_with_summaries(data):
	"""
	Update the data dictionary by summarizing content from URLs present in the data.
	:param data: The original data dictionary.
	:return: A new dictionary (data_updated) with URL content summarized.
	"""
	data_updated = data.copy()
	for key, value in data.items():
	# Check if the value is a URL by using a simple regex
	if isinstance(value, str) and re.match(r'http[s]?://', value):
	st.write(f"Fetching and summarizing content for URL in '{key}'...")
	summary = extract_and_summarize_article(value)
	data_updated[key] = summary
	return data_updated
	def strip_md(text):
	text = text.replace("*", "").replace("", "").replace("#", "")
	return re.sub(r'([!*_=~-])', r'\\\1', text)

	def create_document():
	doc = Document()
	doc.add_heading("PhD Research Proposal", 0)
	return doc

	def add_section_to_doc(doc, section_name, section_content):
	section_content = strip_md(section_content)
	section_content = section_content.replace("\\", "") # Remove backslashes
	doc.add_heading(section_name, level=1)
	doc.add_paragraph(section_content)
	return doc

	def get_docx_bytes(doc):
	doc_io = io.BytesIO()
	doc.save(doc_io)
	doc_io.seek(0)
	return doc_io

	def send_email_with_attachment(to_email, subject, body, filename, section_content):
	from_email = os.getenv("EMAIL_USER")
	email_password = os.getenv("EMAIL_PASSWORD")

	msg = MIMEMultipart()
	msg['From'] = from_email
	msg['To'] = to_email
	msg['Subject'] = subject

	# Attach the body of the email
	msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain'))

	# Attach the DOCX file
	try:
	with open(filename, 'rb') as attachment:
	part = MIMEBase('application', 'octet-stream')
	part.set_payload(attachment.read())
	encoders.encode_base64(part)
	part.add_header('Content-Disposition', f'attachment; filename={filename}')
	msg.attach(part)

	# Send the email
	with smtplib.SMTP('smtp.gmail.com', 587) as server:
	server.starttls()
	server.login(from_email, email_password)
	server.send_message(msg)

	# Return success message
	return f"Email sent successfully to {to_email} for section '{subject}'."

	except Exception as e:
	return f"Failed to send email to {to_email}: {str(e)}"

	def sanitize_filename_old(filename, max_length=100):
	sanitized = re.sub(r'[<>:"/\\\|?*]', '', filename)
	return sanitized[:max_length]

	def collect_basic_info():
	st.title("PhD Proposal Generator")

	# Basic Research Information
	# Checkbox to allow URL summarization
	summarize_urls = st.checkbox("Summarize URLs in data", value=False)

	research_topic = st.text_input("Research Topic")
	research_question = st.text_area("Research Question")
	objectives = st.text_area("Research Objectives (SMART)")
	methodology = st.text_area("Research Methodology")
	data_collection = st.text_area("Data Collection Methods")
	data_analysis = st.text_area("Data Analysis Methods")
	justification = st.text_area("Justification for Methodology")
	key_authors = st.text_area("Key Authors in the Field")
	recent_developments = st.text_area("Recent Developments in the Field")
	contribution = st.text_area("Contribution to the Field")
	literature_gap = st.text_area("Literature Gaps")
	timeline = st.text_area("Research Timeline (Phases and Deadlines)")
	total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)") # Add this input field

	# Contact information
	st.write("## Contact Information")
	email = st.text_input("Email")
	whatsapp_number = st.text_input("WhatsApp Number")

	if st.button('Submit'):
	# Collect data
	data = {
	"research_topic": research_topic,
	"research_question": research_question,
	"objectives": objectives,
	"methodology": methodology,
	"data_collection": data_collection,
	"data_analysis": data_analysis,
	"justification": justification,
	"key_authors": key_authors,
	"recent_developments": recent_developments,
	"contribution": contribution,
	"literature_gap": literature_gap,
	"timeline": timeline,
	"total_timeframe": total_timeframe, # Ensure this is added to the data dictionary
	"email": email,
	"whatsapp_number": whatsapp_number
	}

	# Initialize an empty history list to store the prompts and responses
	history = []

	# Summarize URLs if the user selected the option
	if summarize_urls:
	st.write("Summarizing URLs in the data...")
	data_updated = update_data_with_summaries(data)
	else:
	data_updated = data.copy()
	# Define the sections to process for an academic proposal
	sections_to_process = [
	("Executive Summary", generate_executive_summary),
	("Research Objectives", generate_research_objectives),
	("Research Methodology", generate_methodology_section),
	("Literature Review Outline", generate_literature_review_outline),
	("Hypotheses", generate_hypotheses),
	("Contribution Statement", generate_contribution_statement),
	("Research Timeline", generate_research_timeline),
	("Limitations", generate_limitations_section),
	("Future Work", generate_future_work_section)
	]

	# Sanitize the research topic for file names
	sanitized_topic = sanitize_filename(research_topic, max_length=50)

	# Create a new document
	doc = create_document()
	for section_name, generate_prompt_func in sections_to_process:
	# Generate prompt for each section
	prompt = generate_prompt_func(data_updated)

	# Call the LLM, passing the prompt, current data, and history
	section_content = call_llm(prompt, data, history,section_name)
	#section_content = call_llm_with_retries(prompt, data_updated, history, section_name)

	# Add the current prompt and response to the history
	history.append(f"{section_name}: {section_content}")

	# Display the generated content for this section
	st.subheader(section_name)
	st.write(section_content)

	# Update document and create download link
	doc = add_section_to_doc(doc, section_name, section_content)
	doc_bytes = get_docx_bytes(doc)

	st.download_button(
	label=f"Download {section_name} as DOCX",
	data=doc_bytes,
	file_name=f"{section_name.replace(' ', '_').lower()}.docx",
	mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
	)

	# Save document to file
	filename = f"PhD_Proposal_for_{sanitized_topic}.docx"
	with open(filename, 'wb') as f:
	f.write(doc_bytes.getbuffer())

	# Prepare files for upload (including the generated proposal)
	file_paths = [filename]

	# Upload the files to transfer.sh
	urls, html_content = upload_files_to_transfer_sh(file_paths)

	print(f"Proposal saved as {filepath}. Uploaded to transfer.sh: {', '.join(urls)}")

	# Display each link in Streamlit using st.markdown
	st.subheader("Uploaded File Links:")
	for url in upload_urls:
	st.markdown(f"[Click to download your file]({url})")


	collect_basic_info()