| import streamlit as st |
| from docx import Document |
| import re |
| import io |
| import os |
| import smtplib |
| from email.mime.multipart import MIMEMultipart |
| from email.mime.base import MIMEBase |
| from email import encoders |
| from email.mime.text import MIMEText |
| from fpdf import FPDF |
| from dotenv import load_dotenv |
| from retrying import retry |
| from funtions import * |
| import logging |
| import random |
| import time |
| import newspaper |
| from newspaper import Article |
|
|
| |
| load_dotenv() |
|
|
| |
| exa = Exa(api_key=os.getenv("EXA_API_KEY")) |
|
|
| |
| client = Groq(api_key=os.getenv("GROQ_API_KEY")) |
| utilized_model = "llama3-70b-8192" |
|
|
| |
| logging.basicConfig(filename="llm_errors.log", level=logging.ERROR) |
|
|
| |
| highlights_options = { |
| "num_sentences": 7, |
| "highlights_per_url": 1, |
| } |
|
|
| |
| st.title("Academic PhD Proposal Generator") |
|
|
| |
| st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True) |
| |
| |
| |
| st.write("For collaboration, please contact the author 👇") |
| st.write("Email: chatgpt4compas@gmail.com") |
| st.markdown("[WhatsApp contact 📞](https://web.whatsapp.com/send?phone=12085033653)") |
|
|
| def sanitize_filename(filename, max_length=10): |
| """ |
| Sanitizes a filename by removing invalid characters and limiting the length to max_length. |
| Only keeps alphanumeric characters and spaces. |
| """ |
| |
| sanitized = re.sub(r'[<>:"/\\|?*]', '', filename) |
| |
| sanitized = sanitized[:max_length] |
| return sanitized |
|
|
| @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5) |
| def call_llm_old(prompt): |
| search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True) |
| info = [sr.highlights[0] for sr in search_response.results] |
| |
| system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal." |
| user_prompt = f"Sources: {info}\nQuestion: {prompt}" |
| |
| completion = client.chat.completions.create( |
| model=utilized_model, |
| messages=[ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": user_prompt}, |
| ] |
| ) |
| return completion.choices[0].message.content |
|
|
| @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5) |
| def call_llm(prompt, data, history,section_name): |
| """ |
| Calls the LLM model to generate content, handling missing data fields by searching for context. |
| :param prompt: The current prompt to generate content. |
| :param data: The dictionary of input fields collected from the user. |
| :param history: A list of previous prompts and responses to enhance the model's understanding. |
| :return: Generated content based on the prompt and available data. |
| """ |
| |
| missing_fields = [key for key, value in data.items() if not value] |
|
|
| if missing_fields: |
| |
| search_queries = [] |
| for field in missing_fields: |
| search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}." |
| search_queries.append(search_query) |
|
|
| |
| search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \ |
| f"History: {history}\n" \ |
| f"Search Queries: {search_queries}\n" \ |
| f"Original Prompt: {prompt}" |
| prompt = search_prompt |
|
|
| |
| system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields." |
|
|
| |
| if section_name == "Executive Summary": |
| system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution." |
| elif section_name == "Research Objectives": |
| system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)." |
| elif section_name == "Research Methodology": |
| system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability." |
| elif section_name == "Literature Review Outline": |
| system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field." |
| elif section_name == "Hypotheses": |
| system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration." |
| elif section_name == "Contribution Statement": |
| system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding." |
| elif section_name == "Research Timeline": |
| system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe." |
| elif section_name == "Limitations": |
| system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors." |
| elif section_name == "Future Work": |
| system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings." |
| |
| completion = client.chat.completions.create( |
| model=utilized_model, |
| messages=[ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": prompt}, |
| ] |
| ) |
| return completion.choices[0].message.content |
|
|
| def delay_with_backoff(attempt): |
| """ |
| Delay execution with an increasing backoff. |
| Starts with a random delay between 7-9 seconds and increases exponentially |
| on each attempt, with a maximum delay of 10 seconds. |
| """ |
| delay = random.uniform(7, 9) * (2 ** (attempt - 1)) |
| delay = min(delay, 10) |
| time.sleep(delay) |
|
|
| def call_llm_with_retries(prompt, data, history, section_name, max_retries=3): |
| """ |
| Calls the LLM model to generate content, retrying up to max_retries times in case of errors. |
| Implements randomized delay between retries with exponential backoff. |
| :param prompt: The current prompt to generate content. |
| :param data: The dictionary of input fields collected from the user. |
| :param history: A list of previous prompts and responses to enhance the model's understanding. |
| :param section_name: The name of the current section being generated. |
| :param max_retries: Maximum number of retry attempts (default: 3). |
| :return: Generated content based on the prompt and available data, or error message after retries. |
| """ |
| for attempt in range(1, max_retries + 1): |
| try: |
| |
| return call_llm(prompt, data, history, section_name) |
| |
| except Exception as e: |
| |
| logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}") |
| |
| |
| st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...") |
| |
| |
| if attempt == max_retries: |
| return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later." |
|
|
| |
| delay_with_backoff(attempt) |
| st.write(f"Retrying {section_name} after delay...") |
|
|
| return f"Error: Maximum retry attempts exceeded for {section_name}." |
| |
| def extract_and_summarize_article(url): |
| """ |
| Fetch and summarize content from a URL using the newspaper3k module. |
| :param url: The URL to be scraped. |
| :return: A summarized version of the article content. |
| """ |
| try: |
| article = Article(url) |
| article.download() |
| article.parse() |
| article.nlp() |
| return article.summary |
| except Exception as e: |
| logging.error(f"Error summarizing article from URL {url}: {str(e)}") |
| return f"Error fetching or summarizing content from {url}" |
|
|
| def update_data_with_summaries(data): |
| """ |
| Update the data dictionary by summarizing content from URLs present in the data. |
| :param data: The original data dictionary. |
| :return: A new dictionary (data_updated) with URL content summarized. |
| """ |
| data_updated = data.copy() |
| for key, value in data.items(): |
| |
| if isinstance(value, str) and re.match(r'http[s]?://', value): |
| st.write(f"Fetching and summarizing content for URL in '{key}'...") |
| summary = extract_and_summarize_article(value) |
| data_updated[key] = summary |
| return data_updated |
| def strip_md(text): |
| text = text.replace("**", "").replace("*", "").replace("#", "") |
| return re.sub(r'([!*_=~-])', r'\\\1', text) |
|
|
| def create_document(): |
| doc = Document() |
| doc.add_heading("PhD Research Proposal", 0) |
| return doc |
|
|
| def add_section_to_doc(doc, section_name, section_content): |
| section_content = strip_md(section_content) |
| section_content = section_content.replace("\\", "") |
| doc.add_heading(section_name, level=1) |
| doc.add_paragraph(section_content) |
| return doc |
|
|
| def get_docx_bytes(doc): |
| doc_io = io.BytesIO() |
| doc.save(doc_io) |
| doc_io.seek(0) |
| return doc_io |
|
|
| def send_email_with_attachment(to_email, subject, body, filename, section_content): |
| from_email = os.getenv("EMAIL_USER") |
| email_password = os.getenv("EMAIL_PASSWORD") |
|
|
| msg = MIMEMultipart() |
| msg['From'] = from_email |
| msg['To'] = to_email |
| msg['Subject'] = subject |
|
|
| |
| msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain')) |
|
|
| |
| try: |
| with open(filename, 'rb') as attachment: |
| part = MIMEBase('application', 'octet-stream') |
| part.set_payload(attachment.read()) |
| encoders.encode_base64(part) |
| part.add_header('Content-Disposition', f'attachment; filename={filename}') |
| msg.attach(part) |
|
|
| |
| with smtplib.SMTP('smtp.gmail.com', 587) as server: |
| server.starttls() |
| server.login(from_email, email_password) |
| server.send_message(msg) |
|
|
| |
| return f"Email sent successfully to {to_email} for section '{subject}'." |
| |
| except Exception as e: |
| return f"Failed to send email to {to_email}: {str(e)}" |
|
|
| def sanitize_filename_old(filename, max_length=100): |
| sanitized = re.sub(r'[<>:"/\\|?*]', '', filename) |
| return sanitized[:max_length] |
|
|
| def collect_basic_info(): |
| st.title("PhD Proposal Generator") |
|
|
| |
| |
| summarize_urls = st.checkbox("Summarize URLs in data", value=False) |
| |
| research_topic = st.text_input("Research Topic") |
| research_question = st.text_area("Research Question") |
| objectives = st.text_area("Research Objectives (SMART)") |
| methodology = st.text_area("Research Methodology") |
| data_collection = st.text_area("Data Collection Methods") |
| data_analysis = st.text_area("Data Analysis Methods") |
| justification = st.text_area("Justification for Methodology") |
| key_authors = st.text_area("Key Authors in the Field") |
| recent_developments = st.text_area("Recent Developments in the Field") |
| contribution = st.text_area("Contribution to the Field") |
| literature_gap = st.text_area("Literature Gaps") |
| timeline = st.text_area("Research Timeline (Phases and Deadlines)") |
| total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)") |
|
|
| |
| st.write("## Contact Information") |
| email = st.text_input("Email") |
| whatsapp_number = st.text_input("WhatsApp Number") |
|
|
| if st.button('Submit'): |
| |
| data = { |
| "research_topic": research_topic, |
| "research_question": research_question, |
| "objectives": objectives, |
| "methodology": methodology, |
| "data_collection": data_collection, |
| "data_analysis": data_analysis, |
| "justification": justification, |
| "key_authors": key_authors, |
| "recent_developments": recent_developments, |
| "contribution": contribution, |
| "literature_gap": literature_gap, |
| "timeline": timeline, |
| "total_timeframe": total_timeframe, |
| "email": email, |
| "whatsapp_number": whatsapp_number |
| } |
|
|
| |
| history = [] |
|
|
| |
| if summarize_urls: |
| st.write("Summarizing URLs in the data...") |
| data_updated = update_data_with_summaries(data) |
| else: |
| data_updated = data.copy() |
| |
| sections_to_process = [ |
| ("Executive Summary", generate_executive_summary), |
| ("Research Objectives", generate_research_objectives), |
| ("Research Methodology", generate_methodology_section), |
| ("Literature Review Outline", generate_literature_review_outline), |
| ("Hypotheses", generate_hypotheses), |
| ("Contribution Statement", generate_contribution_statement), |
| ("Research Timeline", generate_research_timeline), |
| ("Limitations", generate_limitations_section), |
| ("Future Work", generate_future_work_section) |
| ] |
|
|
| |
| sanitized_topic = sanitize_filename(research_topic, max_length=50) |
|
|
| |
| doc = create_document() |
| for section_name, generate_prompt_func in sections_to_process: |
| |
| prompt = generate_prompt_func(data_updated) |
| |
| |
| |
| section_content = call_llm_with_retries(prompt, data_updated, history, section_name) |
| |
| |
| history.append(f"{section_name}: {section_content}") |
| |
| |
| st.subheader(section_name) |
| st.write(section_content) |
| |
| |
| doc = add_section_to_doc(doc, section_name, section_content) |
| doc_bytes = get_docx_bytes(doc) |
|
|
| st.download_button( |
| label=f"Download {section_name} as DOCX", |
| data=doc_bytes, |
| file_name=f"{section_name.replace(' ', '_').lower()}.docx", |
| mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document" |
| ) |
|
|
| |
| filename = f"PhD_Proposal_for_{sanitized_topic}.docx" |
| with open(filename, 'wb') as f: |
| f.write(doc_bytes.getbuffer()) |
|
|
| |
| file_paths = [filename] |
| |
| |
| urls, html_content = upload_files_to_transfer_sh(file_paths) |
|
|
| print (f"Proposal saved as {filepath}. Uploaded to transfer.sh: {', '.join(urls)}") |
| |
| collect_basic_info() |
|
|