|
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
from langchain_chroma import Chroma |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
from bytez import Bytez |
|
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
|
import gradio as gr |
|
|
from dotenv import load_dotenv |
|
|
import os |
|
|
from urllib.parse import urlparse, parse_qs |
|
|
import time |
|
|
|
|
|
api_key = os.environ.get("BYTEZ_API_KEY") |
|
|
sdk = Bytez(api_key) |
|
|
|
|
|
|
|
|
def video_id_extractor(link): |
|
|
if "watch?v=" in link: |
|
|
return link[32:43] |
|
|
else: |
|
|
return link[17:28] |
|
|
|
|
|
|
|
|
def video_id_extractor(link): |
|
|
parsed_url = urlparse(link) |
|
|
|
|
|
if "youtube.com" in parsed_url.netloc: |
|
|
return parse_qs(parsed_url.query).get("v", [None])[0] |
|
|
|
|
|
elif "youtu.be" in parsed_url.netloc: |
|
|
return parsed_url.path.lstrip("/") |
|
|
|
|
|
return None |
|
|
|
|
|
def generate_transcript(video_id): |
|
|
from youtube_transcript_api import YouTubeTranscriptApi, _errors |
|
|
import traceback |
|
|
|
|
|
print(f"[INFO] Fetching transcript for video ID: {video_id}") |
|
|
try: |
|
|
trans = YouTubeTranscriptApi() |
|
|
transcript_raw = trans.fetch(video_id=video_id) |
|
|
transcript = " ".join([i.text for i in transcript_raw.snippets]) |
|
|
print(f"[INFO] Transcript fetched. Length: {len(transcript)} chars") |
|
|
return transcript |
|
|
except _errors.TranscriptsDisabled: |
|
|
print(f"[ERROR] Transcripts are disabled for video {video_id}") |
|
|
except _errors.VideoUnavailable: |
|
|
print(f"[ERROR] Video unavailable or restricted: {video_id}") |
|
|
except _errors.NoTranscriptFound: |
|
|
print(f"[ERROR] No transcript found (no captions in English) for {video_id}") |
|
|
except Exception as e: |
|
|
print(f"[ERROR] Unexpected exception fetching transcript: {e}") |
|
|
traceback.print_exc() |
|
|
return None |
|
|
|
|
|
def create_and_save_vs(trans): |
|
|
try: |
|
|
splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 50) |
|
|
docs = splitter.split_text(trans) |
|
|
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2') |
|
|
vector_store_db = Chroma.from_texts(docs, embeddings) |
|
|
except Exception: |
|
|
return None |
|
|
return vector_store_db |
|
|
|
|
|
def generate_summary(trans): |
|
|
try: |
|
|
model = sdk.model("openai/gpt-4o") |
|
|
if len(trans.split(" ")) > 90000: |
|
|
trans = trans.split(" ")[0:85000] |
|
|
trans = " ".join(trans) |
|
|
except Exception: |
|
|
return None |
|
|
Inp = [{"role": "system", "content": "You are a youtube transcipt sammurizer. Sammurize the transcript under 100 words"}, {"role":"user", "content":trans}] |
|
|
trails = 4 |
|
|
failed = True |
|
|
time_to_sleep = 3 |
|
|
while failed and trails > 0: |
|
|
res = model.run(Inp) |
|
|
if type(res) == list and len(res) == 3: |
|
|
failed = False |
|
|
trails -= 1 |
|
|
return res[0]["content"] |
|
|
else: |
|
|
time.sleep(time_to_sleep) |
|
|
time_to_sleep = time_to_sleep **2 |
|
|
trails -= 1 |
|
|
return None |
|
|
|
|
|
import traceback |
|
|
|
|
|
def setter(link): |
|
|
print(f"[INFO] Received link: {link}") |
|
|
yield gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), "", "" |
|
|
|
|
|
try: |
|
|
video_id = video_id_extractor(link) |
|
|
print(f"[INFO] Extracted video ID: {video_id}") |
|
|
if not video_id: |
|
|
print("[ERROR] Invalid video link") |
|
|
yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), "", "" |
|
|
return |
|
|
|
|
|
transcript = generate_transcript(video_id) |
|
|
print(f"[INFO] Transcript length: {len(transcript) if transcript else 0}") |
|
|
if not transcript: |
|
|
print("[ERROR] Transcript generation failed") |
|
|
yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), "", "" |
|
|
return |
|
|
|
|
|
vectorstore = create_and_save_vs(transcript) |
|
|
print("[INFO] Vectorstore created") |
|
|
if not vectorstore: |
|
|
print("[ERROR] Vectorstore creation failed") |
|
|
yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "", "" |
|
|
return |
|
|
|
|
|
summary = generate_summary(transcript) |
|
|
print(f"[INFO] Summary generated: {summary[:80] if summary else None}") |
|
|
if not summary: |
|
|
print("[ERROR] Summary generation failed") |
|
|
yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "", "" |
|
|
return |
|
|
|
|
|
yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), summary, vectorstore |
|
|
|
|
|
except Exception as e: |
|
|
print("[EXCEPTION in setter]:", e) |
|
|
traceback.print_exc() |
|
|
yield gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), "", "" |
|
|
|
|
|
def execute(vec, query): |
|
|
try: |
|
|
res = vec.similarity_search(query, k=3) |
|
|
result = "" |
|
|
for i in res: |
|
|
result += f"\n{i.page_content}" |
|
|
model = sdk.model("openai/gpt-4o") |
|
|
inp = [{"role": "system", "content": "You are a helpful assistant - you will be asked a query and provided with a context. You have to answer that query based on the provided context - do not make things up. Do not reveal the whole context, answer as like you already knew the context"}, {"role":"user", "content":f"query: {query} | context: {result}"}] |
|
|
res = model.run(inp) |
|
|
return res[0]['content'], gr.update(visible=True), gr.update(visible=False) |
|
|
except Exception: |
|
|
return "", gr.update(visible=False), gr.update(visible=True) |
|
|
|
|
|
with gr.Blocks( |
|
|
theme=gr.themes.Soft( |
|
|
primary_hue="blue", |
|
|
secondary_hue="indigo", |
|
|
), |
|
|
css=""" |
|
|
/* Global Styles */ |
|
|
.gradio-container { |
|
|
font-family: 'Inter', 'Segoe UI', sans-serif !important; |
|
|
max-width: 1200px !important; |
|
|
margin: 0 auto !important; |
|
|
} |
|
|
|
|
|
/* Header Branding */ |
|
|
.header-brand { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
padding: 2rem; |
|
|
border-radius: 16px; |
|
|
margin-bottom: 2rem; |
|
|
box-shadow: 0 10px 40px rgba(102, 126, 234, 0.3); |
|
|
animation: fadeInDown 0.8s ease-out; |
|
|
} |
|
|
|
|
|
.header-brand h1 { |
|
|
color: white; |
|
|
font-size: 2.5rem; |
|
|
font-weight: 700; |
|
|
margin: 0; |
|
|
text-shadow: 2px 2px 4px rgba(0,0,0,0.2); |
|
|
} |
|
|
|
|
|
.header-brand p { |
|
|
color: rgba(255,255,255,0.95); |
|
|
font-size: 1.1rem; |
|
|
margin: 0.5rem 0 0 0; |
|
|
} |
|
|
|
|
|
/* Footer Branding */ |
|
|
.footer-brand { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
padding: 1.5rem; |
|
|
border-radius: 12px; |
|
|
margin-top: 2rem; |
|
|
text-align: center; |
|
|
box-shadow: 0 -5px 20px rgba(102, 126, 234, 0.2); |
|
|
} |
|
|
|
|
|
.footer-brand p { |
|
|
color: white; |
|
|
margin: 0.3rem 0; |
|
|
font-size: 0.95rem; |
|
|
} |
|
|
|
|
|
.footer-brand a { |
|
|
color: #ffd700; |
|
|
text-decoration: none; |
|
|
font-weight: 600; |
|
|
transition: all 0.3s ease; |
|
|
} |
|
|
|
|
|
.footer-brand a:hover { |
|
|
color: #fff; |
|
|
text-shadow: 0 0 10px rgba(255,255,255,0.5); |
|
|
} |
|
|
|
|
|
/* Main Title Animation */ |
|
|
.main-title { |
|
|
background: linear-gradient(90deg, #667eea, #764ba2, #667eea); |
|
|
background-size: 200% auto; |
|
|
color: white; |
|
|
padding: 1.5rem; |
|
|
border-radius: 12px; |
|
|
text-align: center; |
|
|
font-size: 1.8rem; |
|
|
font-weight: 600; |
|
|
margin-bottom: 2rem; |
|
|
box-shadow: 0 8px 32px rgba(102, 126, 234, 0.4); |
|
|
animation: gradientShift 3s ease infinite, fadeIn 1s ease-out; |
|
|
} |
|
|
|
|
|
/* Button Styles */ |
|
|
.gr-button { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
|
|
border: none !important; |
|
|
color: white !important; |
|
|
font-weight: 600 !important; |
|
|
padding: 12px 32px !important; |
|
|
border-radius: 8px !important; |
|
|
transition: all 0.3s ease !important; |
|
|
box-shadow: 0 4px 15px rgba(102, 126, 234, 0.4) !important; |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 0.5px; |
|
|
} |
|
|
|
|
|
.gr-button:hover { |
|
|
transform: translateY(-2px) !important; |
|
|
box-shadow: 0 6px 25px rgba(102, 126, 234, 0.6) !important; |
|
|
} |
|
|
|
|
|
.gr-button:active { |
|
|
transform: translateY(0px) !important; |
|
|
} |
|
|
|
|
|
/* Input Fields */ |
|
|
.gr-textbox, .gr-text-input { |
|
|
border-radius: 8px !important; |
|
|
border: 2px solid #e0e7ff !important; |
|
|
transition: all 0.3s ease !important; |
|
|
} |
|
|
|
|
|
.gr-textbox:focus, .gr-text-input:focus { |
|
|
border-color: #667eea !important; |
|
|
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important; |
|
|
} |
|
|
|
|
|
/* Loading Animation */ |
|
|
.loading-container { |
|
|
text-align: center; |
|
|
padding: 3rem; |
|
|
} |
|
|
|
|
|
.loading-text { |
|
|
font-size: 1.5rem; |
|
|
color: #667eea; |
|
|
animation: pulse 1.5s ease-in-out infinite; |
|
|
} |
|
|
|
|
|
/* Error Messages */ |
|
|
.error-message { |
|
|
background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); |
|
|
color: white; |
|
|
padding: 1.5rem; |
|
|
border-radius: 12px; |
|
|
text-align: center; |
|
|
font-size: 1.3rem; |
|
|
font-weight: 600; |
|
|
box-shadow: 0 8px 32px rgba(245, 87, 108, 0.3); |
|
|
animation: shake 0.5s ease-in-out; |
|
|
} |
|
|
|
|
|
/* Success/Summary Box */ |
|
|
.summary-box { |
|
|
background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%); |
|
|
padding: 1.5rem; |
|
|
border-radius: 12px; |
|
|
margin-bottom: 1.5rem; |
|
|
box-shadow: 0 8px 24px rgba(168, 237, 234, 0.3); |
|
|
animation: fadeInUp 0.6s ease-out; |
|
|
} |
|
|
|
|
|
/* Chat Section */ |
|
|
.chat-section { |
|
|
animation: fadeInUp 0.8s ease-out; |
|
|
} |
|
|
|
|
|
/* Animations */ |
|
|
@keyframes fadeIn { |
|
|
from { |
|
|
opacity: 0; |
|
|
} |
|
|
to { |
|
|
opacity: 1; |
|
|
} |
|
|
} |
|
|
|
|
|
@keyframes fadeInDown { |
|
|
from { |
|
|
opacity: 0; |
|
|
transform: translateY(-30px); |
|
|
} |
|
|
to { |
|
|
opacity: 1; |
|
|
transform: translateY(0); |
|
|
} |
|
|
} |
|
|
|
|
|
@keyframes fadeInUp { |
|
|
from { |
|
|
opacity: 0; |
|
|
transform: translateY(30px); |
|
|
} |
|
|
to { |
|
|
opacity: 1; |
|
|
transform: translateY(0); |
|
|
} |
|
|
} |
|
|
|
|
|
@keyframes pulse { |
|
|
0%, 100% { |
|
|
opacity: 1; |
|
|
} |
|
|
50% { |
|
|
opacity: 0.5; |
|
|
} |
|
|
} |
|
|
|
|
|
@keyframes shake { |
|
|
0%, 100% { transform: translateX(0); } |
|
|
25% { transform: translateX(-10px); } |
|
|
75% { transform: translateX(10px); } |
|
|
} |
|
|
|
|
|
@keyframes gradientShift { |
|
|
0% { |
|
|
background-position: 0% 50%; |
|
|
} |
|
|
50% { |
|
|
background-position: 100% 50%; |
|
|
} |
|
|
100% { |
|
|
background-position: 0% 50%; |
|
|
} |
|
|
} |
|
|
|
|
|
/* Responsive Design */ |
|
|
@media (max-width: 768px) { |
|
|
.header-brand h1 { |
|
|
font-size: 1.8rem; |
|
|
} |
|
|
.main-title { |
|
|
font-size: 1.3rem; |
|
|
} |
|
|
} |
|
|
""" |
|
|
) as ui: |
|
|
|
|
|
gr.HTML(""" |
|
|
<div class="header-brand"> |
|
|
<h1>🎓 AI YouTube Study Assistant</h1> |
|
|
<p>Transform lengthy videos into concise knowledge</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
vs = gr.State() |
|
|
gr.HTML('<div class="main-title">📹 Why watch long YouTube videos when you could study from AI?</div>') |
|
|
|
|
|
with gr.Row(visible=True) as first_page: |
|
|
youtube_link = gr.Textbox( |
|
|
label="Enter the youtube link here: ", |
|
|
lines=2, |
|
|
placeholder="https://www.youtube.com/watch?v=..." |
|
|
) |
|
|
submit_button = gr.Button("SUBMIT!") |
|
|
|
|
|
with gr.Row(visible=False) as chat_page: |
|
|
with gr.Column(): |
|
|
summary = gr.Markdown(elem_classes="summary-box") |
|
|
gr.Markdown("### 💬 Now ask any question about the video:") |
|
|
ques = gr.Textbox( |
|
|
label="Enter the question here: ", |
|
|
lines=2, |
|
|
placeholder="What is the main topic of this video?" |
|
|
) |
|
|
submit_answer = gr.Button("SUBMIT!") |
|
|
answer = gr.TextArea(label="ANSWER") |
|
|
|
|
|
with gr.Row(visible=False) as wrong_link_page: |
|
|
gr.HTML('<div class="error-message">❌ Sorry, your link wasn\'t correct. Please try again!</div>') |
|
|
|
|
|
with gr.Row(visible=False) as cc_not_enabled: |
|
|
gr.HTML('<div class="error-message">⚠️ The link you provided was either not valid or subtitles weren\'t enabled in that video</div>') |
|
|
|
|
|
with gr.Row(visible=False) as loading_page: |
|
|
gr.HTML('<div class="loading-container"><div class="loading-text">⏳ Loading... Please Wait</div></div>') |
|
|
|
|
|
with gr.Row(visible=False) as normal_error: |
|
|
gr.HTML('<div class="error-message">😔 SORRY, SOME ERROR OCCURRED. PLEASE TRY AGAIN LATER</div>') |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<div class="footer-brand"> |
|
|
<p><strong>Developed by Darsh Tayal</strong></p> |
|
|
<p>📧 <a href="mailto:darshtayal8@gmail.com">darshtayal8@gmail.com</a></p> |
|
|
<p style="margin-top: 1rem; font-size: 0.85rem; opacity: 0.9;">© 2024 All Rights Reserved</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
submit_button.click(setter, inputs=[youtube_link], outputs=[first_page, loading_page, chat_page, wrong_link_page, cc_not_enabled, normal_error, summary, vs]) |
|
|
submit_answer.click(execute, inputs=[vs, ques], outputs=[answer, chat_page, normal_error]) |
|
|
|
|
|
ui.launch() |