smitathkr1 commited on
Commit
0ec70ea
·
verified ·
1 Parent(s): bbd5d01

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +305 -0
app.py ADDED
@@ -0,0 +1,305 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Tuple
2
+ import streamlit as st
3
+ import os
4
+ from openai import OpenAI
5
+ from groq import Groq
6
+ import openai
7
+ import os
8
+ from pytube import YouTube
9
+ from pathlib import Path
10
+ import time
11
+ import fitz # PyMuPDF
12
+ import moviepy.editor as mp
13
+ from brain import get_index_for_pdf
14
+ from langchain.chains import RetrievalQA
15
+ from langchain.chat_models import ChatOpenAI
16
+ import os
17
+ import os
18
+ import queue
19
+ import re
20
+ import tempfile
21
+ import threading
22
+
23
+ import streamlit as st
24
+
25
+ from embedchain import App
26
+ from embedchain.config import BaseLlmConfig
27
+ from embedchain.helpers.callbacks import (StreamingStdOutCallbackHandlerYield,
28
+ generate)
29
+
30
+
31
+ st.set_page_config(page_title="Notes Maker", page_icon="📚", layout="wide")
32
+ client = Groq(api_key="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")
33
+ client_openai = OpenAI(api_key="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")
34
+ # Define a function for the login system
35
+ def login_user(username, password):
36
+ # Placeholder for actual authentication logic
37
+ if username == "admin" and password == "password":
38
+ return True
39
+ else:
40
+ return False
41
+
42
+ # Function to show the dropdown menu and handle the selection
43
+ def show_dropdown(user_logged_in):
44
+ if user_logged_in:
45
+
46
+ options = ["Select Page", "🎥 Notes Maker from Video and Audio", "📚 Notes Generator from PDF",'Search']
47
+ choice = st.sidebar.selectbox("Choose an option:", options)
48
+
49
+ # Check if a selection has been made and display content accordingly
50
+ if choice == "🎥 Notes Maker from Video and Audio":
51
+ youtube_to_notes()
52
+ if choice == "Search":
53
+ search()
54
+ elif choice == "📚 Notes Generator from PDF":
55
+ st.header("Notes Generator from PDF")
56
+ notes_maker()
57
+ else:
58
+ # Message to be displayed if the user is not logged in
59
+ st.info("Please login to see the options.")
60
+
61
+ # Main app
62
+ def main():
63
+ # Creating a sidebar for login
64
+ st.sidebar.title("Login")
65
+
66
+ username = st.sidebar.text_input("Username")
67
+ password = st.sidebar.text_input("Password", type="password")
68
+
69
+ # This session state variable will keep track of whether the user is logged in
70
+ if 'user_logged_in' not in st.session_state:
71
+ st.session_state['user_logged_in'] = False
72
+
73
+ if st.sidebar.button("Login"):
74
+ if login_user(username, password):
75
+ st.session_state['user_logged_in'] = True
76
+ st.success(f"You are logged in as {username}")
77
+ else:
78
+ st.sidebar.error("Incorrect Username/Password")
79
+
80
+ show_dropdown(st.session_state['user_logged_in'])
81
+
82
+ def youtube_to_notes():
83
+ client = openai.Client(api_key="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")
84
+ # Set your OpenAI Assistant ID here
85
+
86
+
87
+ def download_video(url, output_directory):
88
+ yt = YouTube(url)
89
+ ys = yt.streams.filter(only_audio=False).first()
90
+ video_output_path = os.path.join(output_directory, "video.mp4")
91
+ ys.download(output_path=output_directory, filename="video.mp4")
92
+ return video_output_path
93
+
94
+ def video_to_mp3(video_path, mp3_path):
95
+ audio_clip = mp.AudioFileClip(video_path)
96
+ audio_clip.write_audiofile(mp3_path)
97
+
98
+
99
+
100
+ # Streamlit UI
101
+ st.title('Video/Audio to MP3 Converter, Transcriber, and Notes Generator')
102
+ # Sidebar option for users to upload their own files
103
+
104
+ # Language selection
105
+ language_options = {
106
+ "English": "English",
107
+ "German": "German",
108
+ "Spanish": "Spanish",
109
+ "Japanese": "Japanese",
110
+ "French": "French",
111
+ "Hindi": "Hindi",
112
+ "Chinese": "Chinese",
113
+ # Add other languages and their codes here
114
+ }
115
+ language = st.selectbox('Select target Language', options=list(language_options.keys()))
116
+
117
+ input_type = st.radio('Choose input type:', ('YouTube URL', 'Upload Video File', 'Upload Audio File'))
118
+
119
+ if input_type == 'YouTube URL':
120
+ youtube_url = st.text_input('Enter YouTube Video URL')
121
+ action = st.button('Download and Convert Video')
122
+ elif input_type in ['Upload Video File', 'Upload Audio File']:
123
+ uploaded_file = st.file_uploader("Choose a file", type=['mp4', 'mp3'] if input_type == 'Upload Video File' else ['mp3'])
124
+ action = st.button('Convert Uploaded File')
125
+
126
+ if action:
127
+ with st.spinner('Processing...'):
128
+ output_directory = "downloads"
129
+ os.makedirs(output_directory, exist_ok=True)
130
+
131
+ if input_type == 'YouTube URL' and youtube_url:
132
+ video_output_path = download_video(youtube_url, output_directory)
133
+ elif input_type == 'Upload Video File' and uploaded_file:
134
+ video_output_path = os.path.join(output_directory, uploaded_file.name)
135
+ with open(video_output_path, "wb") as f:
136
+ f.write(uploaded_file.getvalue())
137
+ else:
138
+ st.error("Please enter a YouTube URL or upload a video.")
139
+ st.stop()
140
+
141
+ mp3_output_path = os.path.join(output_directory, "audio.mp3")
142
+ video_to_mp3(video_output_path, mp3_output_path)
143
+ st.success(f"Video converted to MP3: {mp3_output_path}")
144
+
145
+ with open(mp3_output_path, "rb") as audio_file:
146
+ transcript = client.audio.translations.create(
147
+ model="whisper-1",
148
+ prompt="preserve he timestamps and the speaker names if present in the video",
149
+ file=audio_file,
150
+ # Use the selected language code
151
+ )
152
+ transcript_text1 = transcript.text
153
+ transcript_text = transcript.text[:4096] # Adjust according to actual response structure
154
+ speech_file_path = Path(output_directory) / "speech.mp3"
155
+ response = client.audio.speech.create(
156
+ model="tts-1",
157
+ voice="alloy", # Note: You may need to select the voice based on language
158
+ input=transcript_text,
159
+ # Use the selected language code
160
+ )
161
+ response.stream_to_file(str(speech_file_path))
162
+
163
+ st.audio(str(speech_file_path))
164
+ if input_type == 'YouTube URL':
165
+ st.video(youtube_url)
166
+
167
+ # Download transcript button
168
+ st.download_button(label="Download Transcript",
169
+ data=transcript_text1,
170
+ file_name="transcript.txt",
171
+ mime='text/plain')
172
+
173
+ # Make the API call within the if block that checks for the button press
174
+ response = client.chat.completions.create(
175
+ model="gpt-3.5-turbo",
176
+ messages=[
177
+ {"role": "system", "content": "You are a helpful assistant."},
178
+ {"role": "user", "content": "Make very detailed notes containing all topics mentioned in the video along with important timestamps ut please check teh timestamps and see if by mistake they are longer than the vide itself and if so correct them, and make notes for me about the video also add the necessary mathematical formulas using correct symbols for ex. derivatives, integrals, summations etc. if needed also if teh contentent is about organic chemistry tehn only please give include links to teh 3d molecules along with its name for teh molecules/structures mentioned in teh videoin eth note also if the organic reactions are explained in the video then give all the reaction inthe notes along with if some extra resources are available with the link of them for molecular 3d structure dirctly use this format, the links can be generated for example like: https://embed.molview.org/v1/?mode=balls&cid=124527813 and by just changing teh cid num,ber with teh cid number of teh particular molecule also teh 2d structures can be linked via:https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/benzene/PNG again by chaniging teh name whwnvwer you give links make sure they are clicable for user so formate it coerrctlya also if the video is about the pragraming and computer science weite the codes in the code blocks while writing the notes also in elecrronics buildind a circuit if the codes are given write in block codefor chemical reaqction please format them correctly using all teh necessary correct notationa and it should look like a chemical reaction notation with arrows structural formulas, diagrams , etc, transcript is: " + transcript_text1+ "write the notes in "+ language_options[language] + "Language"},
179
+ ]
180
+ )
181
+ notes = response.choices[0].message.content
182
+
183
+ response = client.chat.completions.create(
184
+ model="gpt-3.5-turbo",
185
+ messages=[
186
+ {"role": "system", "content": "this is the notes you made now please check them once agin and confirm it make modification if necessary and please remove any unnecessary non useful part"+notes},
187
+ ]
188
+ ) # Corrected to access the 'content' correctly
189
+ st.download_button(label="Download Notes",
190
+ data=notes,
191
+ file_name="notes.txt",
192
+ mime='text/plain')
193
+ st.download_button(label="Download Notes mkd",
194
+ data=notes,
195
+ file_name="notes.md",
196
+ mime='text/plain')
197
+
198
+ def notes_maker():
199
+ def _parse_highlight(annot: fitz.Annot, wordlist: List[Tuple[float, float, float, float, str, int, int, int]]) -> str:
200
+ points = annot.vertices
201
+ quad_count = int(len(points) / 4)
202
+ sentences = []
203
+ for i in range(quad_count):
204
+ r = fitz.Quad(points[i * 4: i * 4 + 4]).rect
205
+ words = [w for w in wordlist if fitz.Rect(w[:4]).intersects(r)]
206
+ sentences.append(" ".join(w[4] for w in words))
207
+ sentence = " ".join(sentences)
208
+ return sentence
209
+
210
+ def handle_page(page):
211
+ wordlist = page.get_text("words")
212
+ wordlist.sort(key=lambda w: (w[3], w[0]))
213
+
214
+ highlights = []
215
+ annot = page.first_annot
216
+ while annot:
217
+ if annot.type[0] == 8: # Highlight annotation type
218
+ highlights.append(_parse_highlight(annot, wordlist))
219
+ annot = annot.next
220
+ return highlights, page.get_text("text")
221
+
222
+ def extract_highlights_and_full_text(filepath: str) -> Tuple[List, str]:
223
+ doc = fitz.open(filepath)
224
+ highlights = []
225
+ full_text = ""
226
+ for page in doc:
227
+ page_highlights, page_text = handle_page(page)
228
+ highlights += page_highlights
229
+ full_text += page_text + "\n"
230
+
231
+ return highlights, full_text.strip()
232
+
233
+ def generate_notes(text_for_notes, include_problem_solving):
234
+ base_prompt = "Make properly formatted notes for the text. If there are problem-solving parts, provide schemas and explanations."
235
+ problem_solving_prompt = " Include detailed problem-solving schemas and explanations for any issues or examples presented."
236
+ prompt = base_prompt + (problem_solving_prompt if include_problem_solving else "")
237
+
238
+ completion = client.chat.completions.create(
239
+ model="mixtral-8x7b-32768",
240
+ messages=[
241
+ {
242
+ "role": "system",
243
+ "content" : "You are a note makeing assistant make teh notes elaborative, do not keep them very short, also include molecular represantations, chemical equations, mathematical equations or physics notations or codeblocks with code and formatted correctly wherever needed depening on teh content",
244
+ "role": "user",
245
+ "content": prompt + " Text: " + text_for_notes,
246
+ }
247
+ ],
248
+ temperature=0.5,
249
+ max_tokens=1324,
250
+ top_p=1,
251
+ stream=False,
252
+ stop=None,
253
+ )
254
+ notes = completion.choices[0].message.content
255
+ completion1 = client.chat.completions.create(
256
+ model="mixtral-8x7b-32768",
257
+ messages=[
258
+ {
259
+ "role": "user",
260
+ "content": "these are the notes from step 1 expand them in detiled appropriately and always maintain teh topic structure from teh orginal text if available: " + notes,
261
+ }
262
+ ],
263
+ temperature=0.5,
264
+ max_tokens=1324,
265
+ top_p=1,
266
+ stream=False,
267
+ stop=None,
268
+ )
269
+ notes = completion1.choices[0].message.content
270
+ return completion.choices[0].message.content
271
+
272
+ def main():
273
+ st.title("PDF Highlight Extractor and Notes Generator with Problem-Solving Schemas")
274
+ uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
275
+ include_problem_solving = st.checkbox("Include problem-solving schemas in the notes", value=False)
276
+
277
+ if uploaded_file is not None and st.button('Generate Notes'):
278
+ with st.spinner("Processing..."):
279
+ with open(uploaded_file.name, "wb") as f:
280
+ f.write(uploaded_file.getbuffer())
281
+ highlights, full_text = extract_highlights_and_full_text(uploaded_file.name)
282
+ os.remove(uploaded_file.name)
283
+
284
+ if highlights:
285
+ text_for_notes = "\n".join(highlights)
286
+ else:
287
+ text_for_notes = full_text # Use full text if no highlights
288
+
289
+ notes = generate_notes(text_for_notes, include_problem_solving)
290
+
291
+ st.download_button(label="Download Notes",
292
+ data=notes,
293
+ file_name="notes.txt",
294
+ mime='text/plain')
295
+
296
+ st.success("Notes generated successfully!")
297
+ st.text_area("Generated Notes", notes, height=250)
298
+
299
+ if __name__ == "__main__":
300
+ main()
301
+
302
+
303
+
304
+ if __name__ == "__main__":
305
+ main()