Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +83 -0
- youtube_cookies.txt +6 -0
app.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import validators
|
| 2 |
+
import streamlit as st
|
| 3 |
+
from langchain.prompts import PromptTemplate
|
| 4 |
+
from langchain_groq import ChatGroq
|
| 5 |
+
from langchain.chains.summarize import load_summarize_chain
|
| 6 |
+
from langchain.docstore.document import Document
|
| 7 |
+
from langchain_community.document_loaders import UnstructuredURLLoader
|
| 8 |
+
import yt_dlp
|
| 9 |
+
|
| 10 |
+
# Streamlit App Configuration
|
| 11 |
+
st.set_page_config(page_title="LangChain: Summarize Text From YT or Website", page_icon="🦜")
|
| 12 |
+
st.title("🦜 LangChain: Summarize Text From YT or Website")
|
| 13 |
+
st.subheader('Summarize URL')
|
| 14 |
+
|
| 15 |
+
# Hardcoded Groq API Key (use environment variables in production)
|
| 16 |
+
GROQ_API_KEY = "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"
|
| 17 |
+
|
| 18 |
+
# URL input field
|
| 19 |
+
generic_url = st.text_input("URL", label_visibility="collapsed")
|
| 20 |
+
|
| 21 |
+
# Gemma Model Using Groq API
|
| 22 |
+
llm = ChatGroq(model="mixtral-8x7b-32768", groq_api_key=GROQ_API_KEY)
|
| 23 |
+
|
| 24 |
+
prompt_template = """
|
| 25 |
+
Provide a summary of the following content in 300 words:
|
| 26 |
+
Content:{text}
|
| 27 |
+
|
| 28 |
+
"""
|
| 29 |
+
prompt = PromptTemplate(template=prompt_template, input_variables=["text"])
|
| 30 |
+
|
| 31 |
+
def load_youtube_data(url):
|
| 32 |
+
try:
|
| 33 |
+
ydl_opts = {
|
| 34 |
+
'cookies': 'C:\\Users\\saipr\\anaconda3\\LANGCHAIN\\Text_Summarisation\\youtube_cookies.txt',
|
| 35 |
+
'quiet': True # Path to your exported cookies file
|
| 36 |
+
|
| 37 |
+
}
|
| 38 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
| 39 |
+
info = ydl.extract_info(url, download=False)
|
| 40 |
+
title = info.get('title', 'No Title')
|
| 41 |
+
description = info.get('description', 'No Description')
|
| 42 |
+
content = f"Title: {title}\n\nDescription: {description}"
|
| 43 |
+
return [Document(page_content=content, metadata={"title": title})]
|
| 44 |
+
except Exception as e:
|
| 45 |
+
raise ValueError(f"Failed to extract YouTube data: {str(e)}")
|
| 46 |
+
|
| 47 |
+
def load_website_data(url):
|
| 48 |
+
loader = UnstructuredURLLoader(
|
| 49 |
+
urls=[url],
|
| 50 |
+
ssl_verify=False,
|
| 51 |
+
headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"}
|
| 52 |
+
)
|
| 53 |
+
docs = loader.load()
|
| 54 |
+
return [Document(page_content=doc.page_content, metadata=doc.metadata) for doc in docs]
|
| 55 |
+
|
| 56 |
+
if st.button("Summarize the Content from YT or Website"):
|
| 57 |
+
# Validate URL input
|
| 58 |
+
if not generic_url.strip():
|
| 59 |
+
st.error("Please provide a URL to get started")
|
| 60 |
+
elif not validators.url(generic_url):
|
| 61 |
+
st.error("Please enter a valid URL. It can be a YouTube video URL or website URL.")
|
| 62 |
+
else:
|
| 63 |
+
try:
|
| 64 |
+
with st.spinner("Processing..."):
|
| 65 |
+
# Load data based on URL type
|
| 66 |
+
if "youtube.com" in generic_url or "youtu.be" in generic_url:
|
| 67 |
+
try:
|
| 68 |
+
docs = load_youtube_data(generic_url)
|
| 69 |
+
except ValueError as e:
|
| 70 |
+
st.error(f"Authentication required or unable to process the video: {e}")
|
| 71 |
+
docs = []
|
| 72 |
+
else:
|
| 73 |
+
docs = load_website_data(generic_url)
|
| 74 |
+
|
| 75 |
+
# Proceed if docs are available
|
| 76 |
+
if docs:
|
| 77 |
+
chain = load_summarize_chain(llm, chain_type="stuff", prompt=prompt)
|
| 78 |
+
output_summary = chain.run(docs)
|
| 79 |
+
st.success(output_summary)
|
| 80 |
+
else:
|
| 81 |
+
st.error("No content could be summarized.")
|
| 82 |
+
except Exception as e:
|
| 83 |
+
st.exception(f"Exception: {e}")
|
youtube_cookies.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Netscape HTTP Cookie File
|
| 2 |
+
# This is a generated file! Do not edit.
|
| 3 |
+
|
| 4 |
+
youtube.com TRUE / FALSE 1680999487 CONSENT YES+cb.20230328-07-p0.en+FX+306
|
| 5 |
+
youtube.com TRUE / FALSE 1680999487 YSC L1bQ2Wkm8J0
|
| 6 |
+
youtube.com TRUE / FALSE 1680999487 PREF f1=50000000
|