SoDa12321 commited on
Commit
022961b
·
verified ·
1 Parent(s): 7e00249

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +375 -0
app.py ADDED
@@ -0,0 +1,375 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from docx import Document
3
+ import re
4
+ import io
5
+ import os
6
+ import smtplib
7
+ from email.mime.multipart import MIMEMultipart
8
+ from email.mime.base import MIMEBase
9
+ from email import encoders
10
+ from email.mime.text import MIMEText
11
+ from fpdf import FPDF
12
+ from dotenv import load_dotenv
13
+ from retrying import retry
14
+ from funtions import *
15
+ import logging
16
+ import random
17
+ import time
18
+ import newspaper
19
+ from newspaper import Article
20
+
21
+ # Load environment variables from .env file
22
+ load_dotenv()
23
+
24
+ # Declare the exa search API
25
+ exa = Exa(api_key=os.getenv("EXA_API_KEY"))
26
+
27
+ # Define your API Model and key
28
+ client = Groq(api_key=os.getenv("GROQ_API_KEY"))
29
+ utilized_model = "llama3-70b-8192"
30
+
31
+ # Set up logging
32
+ logging.basicConfig(filename="llm_errors.log", level=logging.ERROR)
33
+
34
+ # Functions for the Exa Search content & Parameters for Highlights search
35
+ highlights_options = {
36
+ "num_sentences": 7, # Length of highlights
37
+ "highlights_per_url": 1, # Get the best highlight for each URL
38
+ }
39
+
40
+ # Add title and author contact
41
+ st.title("Academic PhD Proposal Generator")
42
+
43
+ # Display the image using st.image
44
+ st.image("https://i.sstatic.net/jUkkO0Fd.jpg", caption="PhD Proposal Generator", use_column_width=True)
45
+ #st.markdown("""
46
+ #**Website:** [Academic Resource](https://youruniversity.edu)
47
+ #""")
48
+ st.write("For collaboration, please contact the author 👇")
49
+ st.write("Email: chatgpt4compas@gmail.com")
50
+ st.markdown("[WhatsApp contact 📞](https://web.whatsapp.com/send?phone=12085033653)")
51
+
52
+ def sanitize_filename(filename, max_length=10):
53
+ """
54
+ Sanitizes a filename by removing invalid characters and limiting the length to max_length.
55
+ Only keeps alphanumeric characters and spaces.
56
+ """
57
+ # Remove invalid characters for file names (e.g., <>:"/\|?*)
58
+ sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
59
+ # Limit the length to the first max_length characters
60
+ sanitized = sanitized[:max_length]
61
+ return sanitized
62
+
63
+ @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
64
+ def call_llm_old(prompt):
65
+ search_response = exa.search_and_contents(query=prompt, highlights=highlights_options, num_results=3, use_autoprompt=True)
66
+ info = [sr.highlights[0] for sr in search_response.results]
67
+
68
+ system_prompt = "You are an academic PhD proposal generator. Read the provided contexts and use them to generate the proposal."
69
+ user_prompt = f"Sources: {info}\nQuestion: {prompt}"
70
+
71
+ completion = client.chat.completions.create(
72
+ model=utilized_model,
73
+ messages=[
74
+ {"role": "system", "content": system_prompt},
75
+ {"role": "user", "content": user_prompt},
76
+ ]
77
+ )
78
+ return completion.choices[0].message.content
79
+
80
+ @retry(wait_exponential_multiplier=1000, wait_exponential_max=10000, stop_max_attempt_number=5)
81
+ def call_llm(prompt, data, history,section_name):
82
+ """
83
+ Calls the LLM model to generate content, handling missing data fields by searching for context.
84
+ :param prompt: The current prompt to generate content.
85
+ :param data: The dictionary of input fields collected from the user.
86
+ :param history: A list of previous prompts and responses to enhance the model's understanding.
87
+ :return: Generated content based on the prompt and available data.
88
+ """
89
+ # Identify any missing fields
90
+ missing_fields = [key for key, value in data.items() if not value]
91
+
92
+ if missing_fields:
93
+ # Create search queries for missing fields based on the research topic or related data
94
+ search_queries = []
95
+ for field in missing_fields:
96
+ search_query = f"Provide context for {field} in relation to {data.get('research_topic', 'this research topic')}."
97
+ search_queries.append(search_query)
98
+
99
+ # Combine the search queries with the history and current prompt
100
+ search_prompt = f"Missing fields: {', '.join(missing_fields)}\n" \
101
+ f"History: {history}\n" \
102
+ f"Search Queries: {search_queries}\n" \
103
+ f"Original Prompt: {prompt}"
104
+ prompt = search_prompt
105
+
106
+ # Execute the model call
107
+ system_prompt = "You are an academic PhD proposal generator. Use the context and history to answer the user's question and fill in any missing fields."
108
+
109
+ # Customize the system prompt based on the section type for better focus
110
+ if section_name == "Executive Summary":
111
+ system_prompt = "You are an expert in PhD proposals. Generate a concise, high-level summary of the research, focusing on the overall research problem, methodology, and expected contribution."
112
+ elif section_name == "Research Objectives":
113
+ system_prompt = "You are an expert in PhD proposals. Write detailed research objectives, ensuring they follow SMART criteria (Specific, Measurable, Achievable, Relevant, Time-bound)."
114
+ elif section_name == "Research Methodology":
115
+ system_prompt = "You are an expert in research methodology. Generate a detailed description of the research design, including data collection and analysis methods, and justify their suitability."
116
+ elif section_name == "Literature Review Outline":
117
+ system_prompt = "You are an academic expert in literature reviews. Provide a comprehensive literature review outline that covers the key authors, recent developments, and gaps in the research field."
118
+ elif section_name == "Hypotheses":
119
+ system_prompt = "Generate clear and concise hypotheses for the research. These should be based on the research questions and provide a basis for further exploration."
120
+ elif section_name == "Contribution Statement":
121
+ system_prompt = "Write a statement explaining the unique contributions this research will make to the field, focusing on how it fills gaps or advances current understanding."
122
+ elif section_name == "Research Timeline":
123
+ system_prompt = "Create a detailed research timeline, outlining the different phases and milestones over the total timeframe."
124
+ elif section_name == "Limitations":
125
+ system_prompt = "Provide an analysis of the limitations of the research, including potential weaknesses in methodology, data collection, or external factors."
126
+ elif section_name == "Future Work":
127
+ system_prompt = "Write a section discussing potential areas of future work that could build on the current research findings."
128
+
129
+ completion = client.chat.completions.create(
130
+ model=utilized_model,
131
+ messages=[
132
+ {"role": "system", "content": system_prompt},
133
+ {"role": "user", "content": prompt},
134
+ ]
135
+ )
136
+ return completion.choices[0].message.content
137
+
138
+ def delay_with_backoff(attempt):
139
+ """
140
+ Delay execution with an increasing backoff.
141
+ Starts with a random delay between 7-9 seconds and increases exponentially
142
+ on each attempt, with a maximum delay of 10 seconds.
143
+ """
144
+ delay = random.uniform(7, 9) * (2 ** (attempt - 1))
145
+ delay = min(delay, 10) # Cap the delay at 10 seconds
146
+ time.sleep(delay)
147
+
148
+ def call_llm_with_retries(prompt, data, history, section_name, max_retries=3):
149
+ """
150
+ Calls the LLM model to generate content, retrying up to max_retries times in case of errors.
151
+ Implements randomized delay between retries with exponential backoff.
152
+ :param prompt: The current prompt to generate content.
153
+ :param data: The dictionary of input fields collected from the user.
154
+ :param history: A list of previous prompts and responses to enhance the model's understanding.
155
+ :param section_name: The name of the current section being generated.
156
+ :param max_retries: Maximum number of retry attempts (default: 3).
157
+ :return: Generated content based on the prompt and available data, or error message after retries.
158
+ """
159
+ for attempt in range(1, max_retries + 1):
160
+ try:
161
+ # Attempt to call the LLM model
162
+ return call_llm(prompt, data, history, section_name)
163
+
164
+ except Exception as e:
165
+ # Log the error and retry with delay
166
+ logging.error(f"Attempt {attempt}: Error calling LLM model for section '{section_name}': {str(e)}")
167
+
168
+ # Print to the console or Streamlit interface
169
+ st.write(f"Attempt {attempt}: There was a problem generating '{section_name}'. Retrying...")
170
+
171
+ # If maximum retries reached, return an error message
172
+ if attempt == max_retries:
173
+ return f"Failed to generate the section '{section_name}' after {max_retries} attempts. Please try again later."
174
+
175
+ # Delay with exponential backoff
176
+ delay_with_backoff(attempt)
177
+ st.write(f"Retrying {section_name} after delay...")
178
+
179
+ return f"Error: Maximum retry attempts exceeded for {section_name}."
180
+
181
+ def extract_and_summarize_article(url):
182
+ """
183
+ Fetch and summarize content from a URL using the newspaper3k module.
184
+ :param url: The URL to be scraped.
185
+ :return: A summarized version of the article content.
186
+ """
187
+ try:
188
+ article = Article(url)
189
+ article.download()
190
+ article.parse()
191
+ article.nlp() # Perform natural language processing to enable summarization
192
+ return article.summary
193
+ except Exception as e:
194
+ logging.error(f"Error summarizing article from URL {url}: {str(e)}")
195
+ return f"Error fetching or summarizing content from {url}"
196
+
197
+ def update_data_with_summaries(data):
198
+ """
199
+ Update the data dictionary by summarizing content from URLs present in the data.
200
+ :param data: The original data dictionary.
201
+ :return: A new dictionary (data_updated) with URL content summarized.
202
+ """
203
+ data_updated = data.copy()
204
+ for key, value in data.items():
205
+ # Check if the value is a URL by using a simple regex
206
+ if isinstance(value, str) and re.match(r'http[s]?://', value):
207
+ st.write(f"Fetching and summarizing content for URL in '{key}'...")
208
+ summary = extract_and_summarize_article(value)
209
+ data_updated[key] = summary
210
+ return data_updated
211
+ def strip_md(text):
212
+ text = text.replace("**", "").replace("*", "").replace("#", "")
213
+ return re.sub(r'([!*_=~-])', r'\\\1', text)
214
+
215
+ def create_document():
216
+ doc = Document()
217
+ doc.add_heading("PhD Research Proposal", 0)
218
+ return doc
219
+
220
+ def add_section_to_doc(doc, section_name, section_content):
221
+ section_content = strip_md(section_content)
222
+ section_content = section_content.replace("\\", "") # Remove backslashes
223
+ doc.add_heading(section_name, level=1)
224
+ doc.add_paragraph(section_content)
225
+ return doc
226
+
227
+ def get_docx_bytes(doc):
228
+ doc_io = io.BytesIO()
229
+ doc.save(doc_io)
230
+ doc_io.seek(0)
231
+ return doc_io
232
+
233
+ def send_email_with_attachment(to_email, subject, body, filename, section_content):
234
+ from_email = os.getenv("EMAIL_USER")
235
+ email_password = os.getenv("EMAIL_PASSWORD")
236
+
237
+ msg = MIMEMultipart()
238
+ msg['From'] = from_email
239
+ msg['To'] = to_email
240
+ msg['Subject'] = subject
241
+
242
+ # Attach the body of the email
243
+ msg.attach(MIMEText(body + f"\n\nContent of the section:\n\n{section_content}", 'plain'))
244
+
245
+ # Attach the DOCX file
246
+ try:
247
+ with open(filename, 'rb') as attachment:
248
+ part = MIMEBase('application', 'octet-stream')
249
+ part.set_payload(attachment.read())
250
+ encoders.encode_base64(part)
251
+ part.add_header('Content-Disposition', f'attachment; filename={filename}')
252
+ msg.attach(part)
253
+
254
+ # Send the email
255
+ with smtplib.SMTP('smtp.gmail.com', 587) as server:
256
+ server.starttls()
257
+ server.login(from_email, email_password)
258
+ server.send_message(msg)
259
+
260
+ # Return success message
261
+ return f"Email sent successfully to {to_email} for section '{subject}'."
262
+
263
+ except Exception as e:
264
+ return f"Failed to send email to {to_email}: {str(e)}"
265
+
266
+ def sanitize_filename_old(filename, max_length=100):
267
+ sanitized = re.sub(r'[<>:"/\\|?*]', '', filename)
268
+ return sanitized[:max_length]
269
+
270
+ def collect_basic_info():
271
+ st.title("PhD Proposal Generator")
272
+
273
+ # Basic Research Information
274
+ # Checkbox to allow URL summarization
275
+ summarize_urls = st.checkbox("Summarize URLs in data", value=False)
276
+
277
+ research_topic = st.text_input("Research Topic")
278
+ research_question = st.text_area("Research Question")
279
+ objectives = st.text_area("Research Objectives (SMART)")
280
+ methodology = st.text_area("Research Methodology")
281
+ data_collection = st.text_area("Data Collection Methods")
282
+ data_analysis = st.text_area("Data Analysis Methods")
283
+ justification = st.text_area("Justification for Methodology")
284
+ key_authors = st.text_area("Key Authors in the Field")
285
+ recent_developments = st.text_area("Recent Developments in the Field")
286
+ contribution = st.text_area("Contribution to the Field")
287
+ literature_gap = st.text_area("Literature Gaps")
288
+ timeline = st.text_area("Research Timeline (Phases and Deadlines)")
289
+ total_timeframe = st.text_area("Total Timeframe (e.g., 3 years)") # Add this input field
290
+
291
+ # Contact information
292
+ st.write("## Contact Information")
293
+ email = st.text_input("Email")
294
+ whatsapp_number = st.text_input("WhatsApp Number")
295
+
296
+ if st.button('Submit'):
297
+ # Collect data
298
+ data = {
299
+ "research_topic": research_topic,
300
+ "research_question": research_question,
301
+ "objectives": objectives,
302
+ "methodology": methodology,
303
+ "data_collection": data_collection,
304
+ "data_analysis": data_analysis,
305
+ "justification": justification,
306
+ "key_authors": key_authors,
307
+ "recent_developments": recent_developments,
308
+ "contribution": contribution,
309
+ "literature_gap": literature_gap,
310
+ "timeline": timeline,
311
+ "total_timeframe": total_timeframe, # Ensure this is added to the data dictionary
312
+ "email": email,
313
+ "whatsapp_number": whatsapp_number
314
+ }
315
+
316
+ # Initialize an empty history list to store the prompts and responses
317
+ history = []
318
+
319
+ # Summarize URLs if the user selected the option
320
+ if summarize_urls:
321
+ st.write("Summarizing URLs in the data...")
322
+ data_updated = update_data_with_summaries(data)
323
+ else:
324
+ data_updated = data.copy()
325
+ # Define the sections to process for an academic proposal
326
+ sections_to_process = [
327
+ ("Executive Summary", generate_executive_summary),
328
+ ("Research Objectives", generate_research_objectives),
329
+ ("Research Methodology", generate_methodology_section),
330
+ ("Literature Review Outline", generate_literature_review_outline),
331
+ ("Hypotheses", generate_hypotheses),
332
+ ("Contribution Statement", generate_contribution_statement),
333
+ ("Research Timeline", generate_research_timeline),
334
+ ("Limitations", generate_limitations_section),
335
+ ("Future Work", generate_future_work_section)
336
+ ]
337
+
338
+ # Sanitize the research topic for file names
339
+ sanitized_topic = sanitize_filename(research_topic, max_length=50)
340
+
341
+ # Create a new document
342
+ doc = create_document()
343
+ for section_name, generate_prompt_func in sections_to_process:
344
+ # Generate prompt for each section
345
+ prompt = generate_prompt_func(data_updated)
346
+
347
+ # Call the LLM, passing the prompt, current data, and history
348
+ #section_content = call_llm(prompt, data, history,section_name)
349
+ section_content = call_llm_with_retries(prompt, data_updated, history, section_name)
350
+
351
+ # Add the current prompt and response to the history
352
+ history.append(f"{section_name}: {section_content}")
353
+
354
+ # Display the generated content for this section
355
+ st.subheader(section_name)
356
+ st.write(section_content)
357
+
358
+ # Update document and create download link
359
+ doc = add_section_to_doc(doc, section_name, section_content)
360
+ doc_bytes = get_docx_bytes(doc)
361
+
362
+ st.download_button(
363
+ label=f"Download {section_name} as DOCX",
364
+ data=doc_bytes,
365
+ file_name=f"{section_name.replace(' ', '_').lower()}.docx",
366
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
367
+ )
368
+
369
+ # Save document to file
370
+ filename = f"PhD_Proposal_for_{sanitized_topic}.docx"
371
+ with open(filename, 'wb') as f:
372
+ f.write(doc_bytes.getbuffer())
373
+
374
+
375
+ collect_basic_info()