Spaces:

SoDa12321
/

Academic_Proposal_generator_As_AI_Compassion

Sleeping

App Files Files Community

SoDa12321 commited on Sep 28, 2024

Commit

022961b

verified ·

1 Parent(s): 7e00249

Create app.py

Browse files

Files changed (1) hide show

app.py +375 -0

app.py ADDED Viewed

	@@ -0,0 +1,375 @@

+import streamlit as st
+from docx import Document
+import re
+import io
+import os
+import smtplib
+from email.mime.multipart import MIMEMultipart
+from email.mime.base import MIMEBase
+from email import encoders
+from email.mime.text import MIMEText
+from fpdf import FPDF
+from dotenv import load_dotenv
+from retrying import retry
+from funtions import *
+import logging
+import random
+import time
+import newspaper
+from newspaper import Article
+# Load environment variables from .env file
+load_dotenv()
+# Declare the exa search API
+exa = Exa(api_key=os.getenv("EXA_API_KEY"))
+# Define your API Model and key
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+utilized_model = "llama3-70b-8192"
+# Set up logging
+logging.basicConfig(filename="llm_errors.log", level=logging.ERROR)
+# Functions for the Exa Search content & Parameters for Highlights search
+highlights_options = {
+    "num_sentences": 7,  # Length of highlights
+    "highlights_per_url": 1,  # Get the best highlight for each URL
+}
+# Add title and author contact
+st.title("Academic PhD Proposal Generator")
+# Display the image using st.image
+st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True)
+#st.markdown("""
+#**Website:** [Academic Resource](https://youruniversity.edu)
+#""")
+st.write("For collaboration, please contact the author 👇")
+st.write("Email: chatgpt4compas@gmail.com")
+st.markdown("[WhatsApp contact 📞](https://web.whatsapp.com/send?phone=12085033653)")
+def sanitize_filename(filename, max_length=10):
+    """
+    Sanitizes a filename by removing invalid characters and limiting the length to max_length.
+    Only keeps alphanumeric characters and spaces.
+    """
+    # Remove invalid characters for file names (e.g., <>:"/\|?*)
+    sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
+    # Limit the length to the first max_length characters
+    sanitized = sanitized[:max_length]
+    return sanitized
+@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
+def call_llm_old(prompt):
+    search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True)
+    info = [sr.highlights[0] for sr in search_response.results]
+    system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal."
+    user_prompt = f"Sources: {info}\nQuestion: {prompt}"
+    completion = client.chat.completions.create(
+        model=utilized_model,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+    )
+    return completion.choices[0].message.content
+@retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
+def call_llm(prompt, data, history,section_name):
+    """
+    Calls the LLM model to generate content, handling missing data fields by searching for context.
+    :param prompt: The current prompt to generate content.
+    :param data: The dictionary of input fields collected from the user.
+    :param history: A list of previous prompts and responses to enhance the model's understanding.
+    :return: Generated content based on the prompt and available data.
+    """
+    # Identify any missing fields
+    missing_fields = [key for key, value in data.items() if not value]
+    if missing_fields:
+        # Create search queries for missing fields based on the research topic or related data
+        search_queries = []
+        for field in missing_fields:
+            search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}."
+            search_queries.append(search_query)
+        # Combine the search queries with the history and current prompt
+        search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \
+                        f"History: {history}\n" \
+                        f"Search Queries: {search_queries}\n" \
+                        f"Original Prompt: {prompt}"
+        prompt = search_prompt
+    # Execute the model call
+    system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields."
+    # Customize the system prompt based on the section type for better focus
+    if section_name == "Executive Summary":
+        system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution."
+    elif section_name == "Research Objectives":
+        system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)."
+    elif section_name == "Research Methodology":
+        system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability."
+    elif section_name == "Literature Review Outline":
+        system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field."
+    elif section_name == "Hypotheses":
+        system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration."
+    elif section_name == "Contribution Statement":
+        system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding."
+    elif section_name == "Research Timeline":
+        system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe."
+    elif section_name == "Limitations":
+        system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors."
+    elif section_name == "Future Work":
+        system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings."
+    completion = client.chat.completions.create(
+        model=utilized_model,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": prompt},
+        ]
+    )
+    return completion.choices[0].message.content
+def delay_with_backoff(attempt):
+    """
+    Delay execution with an increasing backoff.
+    Starts with a random delay between 7-9 seconds and increases exponentially
+    on each attempt, with a maximum delay of 10 seconds.
+    """
+    delay = random.uniform(7, 9) * (2 ** (attempt - 1))
+    delay = min(delay, 10)  # Cap the delay at 10 seconds
+    time.sleep(delay)
+def call_llm_with_retries(prompt, data, history, section_name, max_retries=3):
+    """
+    Calls the LLM model to generate content, retrying up to max_retries times in case of errors.
+    Implements randomized delay between retries with exponential backoff.
+    :param prompt: The current prompt to generate content.
+    :param data: The dictionary of input fields collected from the user.
+    :param history: A list of previous prompts and responses to enhance the model's understanding.
+    :param section_name: The name of the current section being generated.
+    :param max_retries: Maximum number of retry attempts (default: 3).
+    :return: Generated content based on the prompt and available data, or error message after retries.
+    """
+    for attempt in range(1, max_retries + 1):
+        try:
+            # Attempt to call the LLM model
+            return call_llm(prompt, data, history, section_name)
+        except Exception as e:
+            # Log the error and retry with delay
+            logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}")
+            # Print to the console or Streamlit interface
+            st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...")
+            # If maximum retries reached, return an error message
+            if attempt == max_retries:
+                return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later."
+            # Delay with exponential backoff
+            delay_with_backoff(attempt)
+            st.write(f"Retrying {section_name} after delay...")
+    return f"Error: Maximum retry attempts exceeded for {section_name}."
+def extract_and_summarize_article(url):
+    """
+    Fetch and summarize content from a URL using the newspaper3k module.
+    :param url: The URL to be scraped.
+    :return: A summarized version of the article content.
+    """
+    try:
+        article = Article(url)
+        article.download()
+        article.parse()
+        article.nlp()  # Perform natural language processing to enable summarization
+        return article.summary
+    except Exception as e:
+        logging.error(f"Error summarizing article from URL {url}: {str(e)}")
+        return f"Error fetching or summarizing content from {url}"
+def update_data_with_summaries(data):
+    """
+    Update the data dictionary by summarizing content from URLs present in the data.
+    :param data: The original data dictionary.
+    :return: A new dictionary (data_updated) with URL content summarized.
+    """
+    data_updated = data.copy()
+    for key, value in data.items():
+        # Check if the value is a URL by using a simple regex
+        if isinstance(value, str) and re.match(r'http[s]?://', value):
+            st.write(f"Fetching and summarizing content for URL in '{key}'...")
+            summary = extract_and_summarize_article(value)
+            data_updated[key] = summary
+    return data_updated
+def strip_md(text):
+    text = text.replace("**", "").replace("*", "").replace("#", "")
+    return re.sub(r'([!*_=~-])', r'\\\1', text)
+def create_document():
+    doc = Document()
+    doc.add_heading("PhD Research Proposal", 0)
+    return doc
+def add_section_to_doc(doc, section_name, section_content):
+    section_content = strip_md(section_content)
+    section_content = section_content.replace("\\", "")  # Remove backslashes
+    doc.add_heading(section_name, level=1)
+    doc.add_paragraph(section_content)
+    return doc
+def get_docx_bytes(doc):
+    doc_io = io.BytesIO()
+    doc.save(doc_io)
+    doc_io.seek(0)
+    return doc_io
+def send_email_with_attachment(to_email, subject, body, filename, section_content):
+    from_email = os.getenv("EMAIL_USER")
+    email_password = os.getenv("EMAIL_PASSWORD")
+    msg = MIMEMultipart()
+    msg['From'] = from_email
+    msg['To'] = to_email
+    msg['Subject'] = subject
+    # Attach the body of the email
+    msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain'))
+    # Attach the DOCX file
+    try:
+        with open(filename, 'rb') as attachment:
+            part = MIMEBase('application', 'octet-stream')
+            part.set_payload(attachment.read())
+            encoders.encode_base64(part)
+            part.add_header('Content-Disposition', f'attachment; filename={filename}')
+            msg.attach(part)
+        # Send the email
+        with smtplib.SMTP('smtp.gmail.com', 587) as server:
+            server.starttls()
+            server.login(from_email, email_password)
+            server.send_message(msg)
+        # Return success message
+        return f"Email sent successfully to {to_email} for section '{subject}'."
+    except Exception as e:
+        return f"Failed to send email to {to_email}: {str(e)}"
+def sanitize_filename_old(filename, max_length=100):
+    sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
+    return sanitized[:max_length]
+def collect_basic_info():
+    st.title("PhD Proposal Generator")
+    # Basic Research Information
+    # Checkbox to allow URL summarization
+    summarize_urls = st.checkbox("Summarize URLs in data", value=False)
+    research_topic = st.text_input("Research Topic")
+    research_question = st.text_area("Research Question")
+    objectives = st.text_area("Research Objectives (SMART)")
+    methodology = st.text_area("Research Methodology")
+    data_collection = st.text_area("Data Collection Methods")
+    data_analysis = st.text_area("Data Analysis Methods")
+    justification = st.text_area("Justification for Methodology")
+    key_authors = st.text_area("Key Authors in the Field")
+    recent_developments = st.text_area("Recent Developments in the Field")
+    contribution = st.text_area("Contribution to the Field")
+    literature_gap = st.text_area("Literature Gaps")
+    timeline = st.text_area("Research Timeline (Phases and Deadlines)")
+    total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)")  # Add this input field
+    # Contact information
+    st.write("## Contact Information")
+    email = st.text_input("Email")
+    whatsapp_number = st.text_input("WhatsApp Number")
+    if st.button('Submit'):
+        # Collect data
+        data = {
+            "research_topic": research_topic,
+            "research_question": research_question,
+            "objectives": objectives,
+            "methodology": methodology,
+            "data_collection": data_collection,
+            "data_analysis": data_analysis,
+            "justification": justification,
+            "key_authors": key_authors,
+            "recent_developments": recent_developments,
+            "contribution": contribution,
+            "literature_gap": literature_gap,
+            "timeline": timeline,
+            "total_timeframe": total_timeframe,  # Ensure this is added to the data dictionary
+            "email": email,
+            "whatsapp_number": whatsapp_number
+        }
+        # Initialize an empty history list to store the prompts and responses
+        history = []
+        # Summarize URLs if the user selected the option
+        if summarize_urls:
+            st.write("Summarizing URLs in the data...")
+            data_updated = update_data_with_summaries(data)
+        else:
+            data_updated = data.copy()
+        # Define the sections to process for an academic proposal
+        sections_to_process = [
+            ("Executive Summary", generate_executive_summary),
+            ("Research Objectives", generate_research_objectives),
+            ("Research Methodology", generate_methodology_section),
+            ("Literature Review Outline", generate_literature_review_outline),
+            ("Hypotheses", generate_hypotheses),
+            ("Contribution Statement", generate_contribution_statement),
+            ("Research Timeline", generate_research_timeline),
+            ("Limitations", generate_limitations_section),
+            ("Future Work", generate_future_work_section)
+        ]
+        # Sanitize the research topic for file names
+        sanitized_topic = sanitize_filename(research_topic, max_length=50)
+        # Create a new document
+        doc = create_document()
+        for section_name, generate_prompt_func in sections_to_process:
+            # Generate prompt for each section
+            prompt = generate_prompt_func(data_updated)
+            # Call the LLM, passing the prompt, current data, and history
+            #section_content = call_llm(prompt, data, history,section_name)
+            section_content = call_llm_with_retries(prompt, data_updated, history, section_name)
+            # Add the current prompt and response to the history
+            history.append(f"{section_name}: {section_content}")
+            # Display the generated content for this section
+            st.subheader(section_name)
+            st.write(section_content)
+            # Update document and create download link
+            doc = add_section_to_doc(doc, section_name, section_content)
+            doc_bytes = get_docx_bytes(doc)
+            st.download_button(
+                label=f"Download {section_name} as DOCX",
+                data=doc_bytes,
+                file_name=f"{section_name.replace(' ', '_').lower()}.docx",
+                mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+            )
+        # Save document to file
+        filename = f"PhD_Proposal_for_{sanitized_topic}.docx"
+        with open(filename, 'wb') as f:
+            f.write(doc_bytes.getbuffer())
+collect_basic_info()