import streamlit as st from docx import Document import re import io import os import smtplib from email.mime.multipart import MIMEMultipart from email.mime.base import MIMEBase from email import encoders from email.mime.text import MIMEText from fpdf import FPDF from dotenv import load_dotenv from retrying import retry from funtions import * import logging import random import time import newspaper from newspaper import Article max_prompt_lenth=6000 # Load environment variables from .env file load_dotenv() # Declare the exa search API exa = Exa(api_key=os.getenv("EXA_API_KEY")) # Define your API Model and key client = Groq(api_key=os.getenv("GROQ_API_KEY")) utilized_model = "llama3-70b-8192" # Set up logging logging.basicConfig(filename="llm_errors.log", level=logging.ERROR) # Functions for the Exa Search content & Parameters for Highlights search highlights_options = { "num_sentences": 7, # Length of highlights "highlights_per_url": 1, # Get the best highlight for each URL } # Add title and author contact st.title("Academic PhD Proposal Generator") # Display the image using st.image st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True) #st.markdown(""" #**Website:** [Academic Resource](https://youruniversity.edu) #""") st.write("For collaboration, please contact the author 👇") st.write("Email: chatgpt4compas@gmail.com") st.markdown("[WhatsApp contact 📞](https://web.whatsapp.com/send?phone=12085033653)") def sanitize_filename(filename, max_length=10): """ Sanitizes a filename by removing invalid characters and limiting the length to max_length. Only keeps alphanumeric characters and spaces. """ # Remove invalid characters for file names (e.g., <>:"/\|?*) sanitized = re.sub(r'[<>:"/\\|?*]', '', filename) # Limit the length to the first max_length characters sanitized = sanitized[:max_length] return sanitized @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5) def call_llm_old(prompt): search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True) info = [sr.highlights[0] for sr in search_response.results] system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal." user_prompt = f"Sources: {info}\nQuestion: {prompt}" completion = client.chat.completions.create( model=utilized_model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}, ] ) return completion.choices[0].message.content @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5) def call_llm(prompt, data, history,section_name): """ Calls the LLM model to generate content, handling missing data fields by searching for context. :param prompt: The current prompt to generate content. :param data: The dictionary of input fields collected from the user. :param history: A list of previous prompts and responses to enhance the model's understanding. :return: Generated content based on the prompt and available data. """ # Identify any missing fields missing_fields = [key for key, value in data.items() if not value] if missing_fields: # Create search queries for missing fields based on the research topic or related data search_queries = [] for field in missing_fields: search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}." search_queries.append(search_query) # Combine the search queries with the history and current prompt search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \ f"History: {history}\n" \ f"Search Queries: {search_queries}\n" \ f"Original Prompt: {prompt}" prompt = search_prompt[:max_prompt_lenth-1] # Execute the model call system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields." # Customize the system prompt based on the section type for better focus if section_name == "Executive Summary": system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution." elif section_name == "Research Objectives": system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)." elif section_name == "Research Methodology": system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability." elif section_name == "Literature Review Outline": system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field." elif section_name == "Hypotheses": system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration." elif section_name == "Contribution Statement": system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding." elif section_name == "Research Timeline": system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe." elif section_name == "Limitations": system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors." elif section_name == "Future Work": system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings." completion = client.chat.completions.create( model=utilized_model, messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ] ) return completion.choices[0].message.content def delay_with_backoff(attempt): """ Delay execution with an increasing backoff. Starts with a random delay between 7-9 seconds and increases exponentially on each attempt, with a maximum delay of 10 seconds. """ delay = random.uniform(7, 9) * (2 ** (attempt - 1)) delay = min(delay, 10) # Cap the delay at 10 seconds time.sleep(delay) def call_llm_with_retries(prompt, data, history, section_name, max_retries=3): """ Calls the LLM model to generate content, retrying up to max_retries times in case of errors. Implements randomized delay between retries with exponential backoff. :param prompt: The current prompt to generate content. :param data: The dictionary of input fields collected from the user. :param history: A list of previous prompts and responses to enhance the model's understanding. :param section_name: The name of the current section being generated. :param max_retries: Maximum number of retry attempts (default: 3). :return: Generated content based on the prompt and available data, or error message after retries. """ for attempt in range(1, max_retries + 1): try: # Attempt to call the LLM model return call_llm(prompt, data, history, section_name) except Exception as e: # Log the error and retry with delay logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}") # Print to the console or Streamlit interface st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...") # If maximum retries reached, return an error message if attempt == max_retries: return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later." # Delay with exponential backoff delay_with_backoff(attempt) st.write(f"Retrying {section_name} after delay...") return f"Error: Maximum retry attempts exceeded for {section_name}." def extract_and_summarize_article(url): """ Fetch and summarize content from a URL using the newspaper3k module. :param url: The URL to be scraped. :return: A summarized version of the article content. """ try: article = Article(url) article.download() article.parse() article.nlp() # Perform natural language processing to enable summarization return article.summary except Exception as e: logging.error(f"Error summarizing article from URL {url}: {str(e)}") return f"Error fetching or summarizing content from {url}" def update_data_with_summaries(data): """ Update the data dictionary by summarizing content from URLs present in the data. :param data: The original data dictionary. :return: A new dictionary (data_updated) with URL content summarized. """ data_updated = data.copy() for key, value in data.items(): # Check if the value is a URL by using a simple regex if isinstance(value, str) and re.match(r'http[s]?://', value): st.write(f"Fetching and summarizing content for URL in '{key}'...") summary = extract_and_summarize_article(value) data_updated[key] = summary return data_updated def strip_md(text): text = text.replace("**", "").replace("*", "").replace("#", "") return re.sub(r'([!*_=~-])', r'\\\1', text) def create_document(): doc = Document() doc.add_heading("PhD Research Proposal", 0) return doc def add_section_to_doc(doc, section_name, section_content): section_content = strip_md(section_content) section_content = section_content.replace("\\", "") # Remove backslashes doc.add_heading(section_name, level=1) doc.add_paragraph(section_content) return doc def get_docx_bytes(doc): doc_io = io.BytesIO() doc.save(doc_io) doc_io.seek(0) return doc_io def send_email_with_attachment(to_email, subject, body, filename, section_content): from_email = os.getenv("EMAIL_USER") email_password = os.getenv("EMAIL_PASSWORD") msg = MIMEMultipart() msg['From'] = from_email msg['To'] = to_email msg['Subject'] = subject # Attach the body of the email msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain')) # Attach the DOCX file try: with open(filename, 'rb') as attachment: part = MIMEBase('application', 'octet-stream') part.set_payload(attachment.read()) encoders.encode_base64(part) part.add_header('Content-Disposition', f'attachment; filename={filename}') msg.attach(part) # Send the email with smtplib.SMTP('smtp.gmail.com', 587) as server: server.starttls() server.login(from_email, email_password) server.send_message(msg) # Return success message return f"Email sent successfully to {to_email} for section '{subject}'." except Exception as e: return f"Failed to send email to {to_email}: {str(e)}" def sanitize_filename_old(filename, max_length=100): sanitized = re.sub(r'[<>:"/\\|?*]', '', filename) return sanitized[:max_length] def collect_basic_info(): st.title("PhD Proposal Generator") # Basic Research Information # Checkbox to allow URL summarization summarize_urls = st.checkbox("Summarize URLs in data", value=False) research_topic = st.text_input("Research Topic") research_question = st.text_area("Research Question") objectives = st.text_area("Research Objectives (SMART)") methodology = st.text_area("Research Methodology") data_collection = st.text_area("Data Collection Methods") data_analysis = st.text_area("Data Analysis Methods") justification = st.text_area("Justification for Methodology") key_authors = st.text_area("Key Authors in the Field") recent_developments = st.text_area("Recent Developments in the Field") contribution = st.text_area("Contribution to the Field") literature_gap = st.text_area("Literature Gaps") timeline = st.text_area("Research Timeline (Phases and Deadlines)") total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)") # Add this input field # Contact information st.write("## Contact Information") email = st.text_input("Email") whatsapp_number = st.text_input("WhatsApp Number") if st.button('Submit'): # Collect data data = { "research_topic": research_topic, "research_question": research_question, "objectives": objectives, "methodology": methodology, "data_collection": data_collection, "data_analysis": data_analysis, "justification": justification, "key_authors": key_authors, "recent_developments": recent_developments, "contribution": contribution, "literature_gap": literature_gap, "timeline": timeline, "total_timeframe": total_timeframe, # Ensure this is added to the data dictionary "email": email, "whatsapp_number": whatsapp_number } # Initialize an empty history list to store the prompts and responses history = [] # Summarize URLs if the user selected the option if summarize_urls: st.write("Summarizing URLs in the data...") data_updated = update_data_with_summaries(data) else: data_updated = data.copy() # Define the sections to process for an academic proposal sections_to_process = [ ("Executive Summary", generate_executive_summary), ("Research Objectives", generate_research_objectives), ("Research Methodology", generate_methodology_section), ("Literature Review Outline", generate_literature_review_outline), ("Hypotheses", generate_hypotheses), ("Contribution Statement", generate_contribution_statement), ("Research Timeline", generate_research_timeline), ("Limitations", generate_limitations_section), ("Future Work", generate_future_work_section) ] # Sanitize the research topic for file names sanitized_topic = sanitize_filename(research_topic, max_length=50) # Create a new document doc = create_document() for section_name, generate_prompt_func in sections_to_process: # Generate prompt for each section prompt = generate_prompt_func(data_updated) # Call the LLM, passing the prompt, current data, and history section_content = call_llm(prompt, data, history,section_name) #section_content = call_llm_with_retries(prompt, data_updated, history, section_name) # Add the current prompt and response to the history history.append(f"{section_name}: {section_content}") # Display the generated content for this section st.subheader(section_name) st.write(section_content) # Update document and create download link doc = add_section_to_doc(doc, section_name, section_content) doc_bytes = get_docx_bytes(doc) st.download_button( label=f"Download {section_name} as DOCX", data=doc_bytes, file_name=f"{section_name.replace(' ', '_').lower()}.docx", mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" ) # Save document to file filename = f"PhD_Proposal_for_{sanitized_topic}.docx" with open(filename, 'wb') as f: f.write(doc_bytes.getbuffer()) # Prepare files for upload (including the generated proposal) file_paths = [filename] # Upload the files to transfer.sh urls, html_content = upload_files_to_transfer_sh(file_paths) print(f"Proposal saved as {filepath}. Uploaded to transfer.sh: {', '.join(urls)}") # Display each link in Streamlit using st.markdown st.subheader("Uploaded File Links:") for url in upload_urls: st.markdown(f"[Click to download your file]({url})") collect_basic_info()