Ashkchamp commited on
Commit
7abc8ff
·
verified ·
1 Parent(s): e0ce0a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -32
app.py CHANGED
@@ -1,4 +1,7 @@
1
- import os, re, validators, streamlit as st
 
 
 
2
  from dotenv import load_dotenv
3
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
4
  from langchain.prompts import PromptTemplate
@@ -7,67 +10,77 @@ from langchain_groq import ChatGroq
7
  from langchain.schema import Document
8
  from langchain_community.document_loaders import UnstructuredURLLoader
9
  from langchain.document_loaders import PyPDFLoader
 
 
10
  load_dotenv()
11
  GROQ_KEY = os.getenv("GROQ_API_KEY")
12
 
13
  st.set_page_config(page_title="LangChain Summarizer", page_icon="🦜")
14
  st.title("🦜 LangChain: Summarize YT / Webpage / PDF")
15
 
16
- generic_url = st.text_input("Paste a YouTube / web URL here:")
17
- uploaded_file = st.file_uploader("…or upload a PDF", type=["pdf"])
18
-
19
- MAP_PROMPT = PromptTemplate(template="Write a concise summary of the following:\n\n{text}", input_variables=["text"])
20
- COMBINE_PROMPT = PromptTemplate(template="Provide an overall summary (~300 words):\n\n{text}", input_variables=["text"])
21
 
22
- def get_video_id(url: str):
23
  m = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
24
  return m.group(1) if m else None
25
 
 
 
 
 
 
26
  def build_llm():
27
  if "llm" not in st.session_state:
28
  if not GROQ_KEY:
29
- raise RuntimeError("Groq API key missing")
30
- st.session_state.llm = ChatGroq(model="llama-3.3-70b-versatile", groq_api_key=GROQ_KEY, timeout=60_000)
31
  return st.session_state.llm
32
 
33
  def summarize(docs):
34
  llm = build_llm()
35
- chain = load_summarize_chain(llm, chain_type="map_reduce", map_prompt=MAP_PROMPT, combine_prompt=COMBINE_PROMPT)
36
- return chain.invoke({"input_documents": docs})["output_text"]
37
-
38
- def chunk_text(text, size=4000):
39
- return [Document(page_content=text[i:i+size]) for i in range(0, len(text), size)]
40
 
41
  if st.button("Summarize"):
42
  if not GROQ_KEY:
43
- st.error("Groq key missing")
44
- elif not generic_url and not uploaded_file:
45
  st.error("Provide a URL or upload a PDF")
46
  else:
47
  try:
48
- with st.spinner("Processing"):
49
- if uploaded_file:
50
- tmp_path = f"/tmp/{uploaded_file.name}"
51
- with open(tmp_path, "wb") as f:
52
- f.write(uploaded_file.read())
53
- docs = PyPDFLoader(tmp_path).load()
54
- os.remove(tmp_path)
55
- st.success(summarize(docs))
56
- elif "youtube" in generic_url or "youtu.be" in generic_url:
57
- vid = get_video_id(generic_url)
 
58
  if not vid:
59
- st.error("Invalid YouTube URL")
60
  else:
61
  transcript = YouTubeTranscriptApi.get_transcript(vid)
62
  text = " ".join(t["text"] for t in transcript)
63
- st.success(summarize(chunk_text(text)))
 
64
  else:
65
- if not validators.url(generic_url):
66
  st.error("Invalid URL")
67
  else:
68
- docs = UnstructuredURLLoader(urls=[generic_url], ssl_verify=False, headers={"User-Agent":"Mozilla/5.0"}).load()
69
- st.success(summarize(docs))
 
 
 
 
 
70
  except (TranscriptsDisabled, VideoUnavailable) as yt_err:
71
  st.error(str(yt_err))
 
 
72
  except Exception as e:
73
- st.exception(e)
 
1
+ import os
2
+ import re
3
+ import validators
4
+ import streamlit as st
5
  from dotenv import load_dotenv
6
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
7
  from langchain.prompts import PromptTemplate
 
10
  from langchain.schema import Document
11
  from langchain_community.document_loaders import UnstructuredURLLoader
12
  from langchain.document_loaders import PyPDFLoader
13
+ from groq._base_client import APIConnectionError
14
+
15
  load_dotenv()
16
  GROQ_KEY = os.getenv("GROQ_API_KEY")
17
 
18
  st.set_page_config(page_title="LangChain Summarizer", page_icon="🦜")
19
  st.title("🦜 LangChain: Summarize YT / Webpage / PDF")
20
 
21
+ url_input = st.text_input("Paste a YouTube / web URL here:")
22
+ file_input = st.file_uploader("…or upload a PDF", type=["pdf"])
 
 
 
23
 
24
+ def get_video_id(url):
25
  m = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11})", url)
26
  return m.group(1) if m else None
27
 
28
+ SUMMARY_PROMPT = PromptTemplate(
29
+ template="Provide a concise summary (~300 words):\n\nContent:\n{text}",
30
+ input_variables=["text"],
31
+ )
32
+
33
  def build_llm():
34
  if "llm" not in st.session_state:
35
  if not GROQ_KEY:
36
+ raise RuntimeError("Missing GROQ_API_KEY")
37
+ st.session_state.llm = ChatGroq(model="deepseek-r1-distill-llama-70b", groq_api_key=GROQ_KEY)
38
  return st.session_state.llm
39
 
40
  def summarize(docs):
41
  llm = build_llm()
42
+ chain = load_summarize_chain(llm, chain_type="stuff", prompt=SUMMARY_PROMPT)
43
+ return chain({"input_documents": docs})["output_text"]
 
 
 
44
 
45
  if st.button("Summarize"):
46
  if not GROQ_KEY:
47
+ st.error("Set GROQ_API_KEY in .env")
48
+ elif not url_input and not file_input:
49
  st.error("Provide a URL or upload a PDF")
50
  else:
51
  try:
52
+ with st.spinner("Fetching and summarizing…"):
53
+ if file_input:
54
+ tmp = f"/tmp/{file_input.name}"
55
+ with open(tmp, "wb") as f:
56
+ f.write(file_input.read())
57
+ docs = PyPDFLoader(tmp).load()
58
+ summary = summarize(docs)
59
+ os.remove(tmp)
60
+ st.success(summary)
61
+ elif "youtube" in url_input or "youtu.be" in url_input:
62
+ vid = get_video_id(url_input)
63
  if not vid:
64
+ st.error("Couldn’t extract YouTube ID")
65
  else:
66
  transcript = YouTubeTranscriptApi.get_transcript(vid)
67
  text = " ".join(t["text"] for t in transcript)
68
+ summary = summarize([Document(page_content=text)])
69
+ st.success(summary)
70
  else:
71
+ if not validators.url(url_input):
72
  st.error("Invalid URL")
73
  else:
74
+ docs = UnstructuredURLLoader(
75
+ urls=[url_input],
76
+ ssl_verify=False,
77
+ headers={"User-Agent": "Mozilla/5.0"}
78
+ ).load()
79
+ summary = summarize(docs)
80
+ st.success(summary)
81
  except (TranscriptsDisabled, VideoUnavailable) as yt_err:
82
  st.error(str(yt_err))
83
+ except APIConnectionError:
84
+ st.error("Connection to Groq API failed. Check network and API key.")
85
  except Exception as e:
86
+ st.error(f"Unexpected error: {e}")