PranavReddy18 commited on
Commit
70ab642
·
verified ·
1 Parent(s): 20c5de5

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +83 -0
  2. youtube_cookies.txt +6 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import validators
2
+ import streamlit as st
3
+ from langchain.prompts import PromptTemplate
4
+ from langchain_groq import ChatGroq
5
+ from langchain.chains.summarize import load_summarize_chain
6
+ from langchain.docstore.document import Document
7
+ from langchain_community.document_loaders import UnstructuredURLLoader
8
+ import yt_dlp
9
+
10
+ # Streamlit App Configuration
11
+ st.set_page_config(page_title="LangChain: Summarize Text From YT or Website", page_icon="🦜")
12
+ st.title("🦜 LangChain: Summarize Text From YT or Website")
13
+ st.subheader('Summarize URL')
14
+
15
+ # Hardcoded Groq API Key (use environment variables in production)
16
+ GROQ_API_KEY = "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"
17
+
18
+ # URL input field
19
+ generic_url = st.text_input("URL", label_visibility="collapsed")
20
+
21
+ # Gemma Model Using Groq API
22
+ llm = ChatGroq(model="mixtral-8x7b-32768", groq_api_key=GROQ_API_KEY)
23
+
24
+ prompt_template = """
25
+ Provide a summary of the following content in 300 words:
26
+ Content:{text}
27
+
28
+ """
29
+ prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
30
+
31
+ def load_youtube_data(url):
32
+ try:
33
+ ydl_opts = {
34
+ 'cookies': 'C:\\Users\\saipr\\anaconda3\\LANGCHAIN\\Text_Summarisation\\youtube_cookies.txt',
35
+ 'quiet': True # Path to your exported cookies file
36
+
37
+ }
38
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
39
+ info = ydl.extract_info(url, download=False)
40
+ title = info.get('title', 'No Title')
41
+ description = info.get('description', 'No Description')
42
+ content = f"Title: {title}\n\nDescription: {description}"
43
+ return [Document(page_content=content, metadata={"title": title})]
44
+ except Exception as e:
45
+ raise ValueError(f"Failed to extract YouTube data: {str(e)}")
46
+
47
+ def load_website_data(url):
48
+ loader = UnstructuredURLLoader(
49
+ urls=[url],
50
+ ssl_verify=False,
51
+ headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"}
52
+ )
53
+ docs = loader.load()
54
+ return [Document(page_content=doc.page_content, metadata=doc.metadata) for doc in docs]
55
+
56
+ if st.button("Summarize the Content from YT or Website"):
57
+ # Validate URL input
58
+ if not generic_url.strip():
59
+ st.error("Please provide a URL to get started")
60
+ elif not validators.url(generic_url):
61
+ st.error("Please enter a valid URL. It can be a YouTube video URL or website URL.")
62
+ else:
63
+ try:
64
+ with st.spinner("Processing..."):
65
+ # Load data based on URL type
66
+ if "youtube.com" in generic_url or "youtu.be" in generic_url:
67
+ try:
68
+ docs = load_youtube_data(generic_url)
69
+ except ValueError as e:
70
+ st.error(f"Authentication required or unable to process the video: {e}")
71
+ docs = []
72
+ else:
73
+ docs = load_website_data(generic_url)
74
+
75
+ # Proceed if docs are available
76
+ if docs:
77
+ chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
78
+ output_summary = chain.run(docs)
79
+ st.success(output_summary)
80
+ else:
81
+ st.error("No content could be summarized.")
82
+ except Exception as e:
83
+ st.exception(f"Exception: {e}")
youtube_cookies.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Netscape HTTP Cookie File
2
+ # This is a generated file! Do not edit.
3
+
4
+ youtube.com TRUE / FALSE 1680999487 CONSENT YES+cb.20230328-07-p0.en+FX+306
5
+ youtube.com TRUE / FALSE 1680999487 YSC L1bQ2Wkm8J0
6
+ youtube.com TRUE / FALSE 1680999487 PREF f1=50000000