Vikrant-Honbute commited on
Commit
d6b09f2
Β·
0 Parent(s):

Initial commit: YouTube & URL summarizer

Browse files
Files changed (3) hide show
  1. .gitignore +4 -0
  2. app.py +73 -0
  3. requirements.txt +26 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ venv/
2
+ __pycache__/
3
+ .env
4
+ *.env
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import validators, streamlit as st
2
+ from langchain_core.prompts import PromptTemplate
3
+ from langchain_groq import ChatGroq
4
+ from langchain_classic.chains.summarize import load_summarize_chain
5
+ from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
6
+ import urllib.error
7
+
8
+ # Streamlit APP
9
+ st.set_page_config(page_title="SnapSummaryAI β€” YouTube & Web Summarizer", page_icon="🦜")
10
+ st.title("πŸ”—πŸ“ SnapSummaryAI β€” YouTube & Web Summarizer")
11
+ st.subheader("Summarize URL")
12
+
13
+ # Sidebar
14
+ with st.sidebar:
15
+ groq_api_key = st.text_input("Groq API Key", value="", type="password")
16
+
17
+ generic_url = st.text_input("URL", label_visibility="collapsed")
18
+
19
+ prompt_template = """
20
+ Provide a summary of the following content in 300 words:
21
+ Content: {text}
22
+ """
23
+ prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
24
+
25
+ if st.button("Summarize the Content from YT or Website"):
26
+ # Validate inputs
27
+ if not groq_api_key.strip() or not generic_url.strip():
28
+ st.error("Please provide the API key and URL to get started.")
29
+ elif not validators.url(generic_url):
30
+ st.error("Please enter a valid URL. It may be a YouTube video URL or website URL.")
31
+ else:
32
+
33
+ llm = ChatGroq(
34
+ model="openai/gpt-oss-120b",
35
+ groq_api_key=groq_api_key
36
+ )
37
+
38
+ with st.spinner("Waiting..."):
39
+ # Load data
40
+ if "youtube.com" in generic_url or "youtu.be" in generic_url:
41
+ loader = YoutubeLoader.from_youtube_url(
42
+ generic_url,
43
+ add_video_info=False
44
+ )
45
+ else:
46
+ loader = UnstructuredURLLoader(
47
+ urls=[generic_url],
48
+ ssl_verify=False,
49
+ headers={
50
+ "User-Agent": (
51
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) "
52
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
53
+ "Chrome/116.0.0.0 Safari/537.36"
54
+ )
55
+ },
56
+ )
57
+ try:
58
+ docs = loader.load()
59
+ except urllib.error.HTTPError:
60
+ st.error(
61
+ "YouTube returned a 400 Bad Request. "
62
+ "Try a different video URL (non-private, non-short)."
63
+ )
64
+ st.stop()
65
+
66
+ # Summarization chain
67
+ chain = load_summarize_chain(
68
+ llm,
69
+ chain_type="stuff",
70
+ prompt=prompt
71
+ )
72
+ output_summary = chain.run(docs)
73
+ st.write(output_summary)
requirements.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ python-dotenv
3
+ openai
4
+ ipykernel
5
+ langchain-community
6
+ PyPDF
7
+ bs4 #beautiful soup
8
+ langchain-text-splitters
9
+ langchain-openai
10
+ chromadb
11
+ sentence_transformers
12
+ langchain_huggingface
13
+ langchain_chroma
14
+ langchain-openai
15
+ streamlit
16
+ langchain_groq
17
+ langchain_core
18
+ fastapi
19
+ uvicorn
20
+ langserve
21
+ langchain-classic
22
+ arxiv
23
+ wikipedia
24
+ langchain
25
+ validators==0.28.1
26
+ youtube_transcript_api