Lovish Singla commited on
Commit
f94e79b
·
unverified ·
0 Parent(s):

Add files via upload

Browse files
Files changed (2) hide show
  1. app.py +105 -0
  2. requirements.txt +21 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import validators
3
+ import streamlit as st
4
+ from langchain.prompts import PromptTemplate
5
+ from langchain_groq import ChatGroq
6
+ from langchain.chains.summarize import load_summarize_chain
7
+ from langchain_community.document_loaders import PyPDFLoader
8
+ from langchain_community.document_loaders import YoutubeLoader, UnstructuredURLLoader
9
+ import tempfile
10
+
11
+ # Streamlit App Configuration
12
+ st.set_page_config(page_title="LangChain: Summarize Text From YT, Website, or PDF", page_icon="🦜")
13
+ st.title("🦜 LangChain: Summarize Text From YT, Website, or PDF")
14
+ st.subheader("Summarize Content from a URL or Uploaded PDF")
15
+
16
+ # Sidebar: API Key Inputs
17
+ with st.sidebar:
18
+ groq_api_key = st.text_input("Groq API Key", value="", type="password")
19
+ langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password") # LangSmith API Key
20
+
21
+ # Set LangSmith environment variables
22
+ if langsmith_api_key:
23
+ os.environ["LANGCHAIN_TRACING_V2"] = "true"
24
+ os.environ["LANGCHAIN_API_KEY"] = langsmith_api_key
25
+
26
+ # URL Input
27
+ generic_url = st.text_input("URL (YouTube or Website)", label_visibility="collapsed")
28
+
29
+ # PDF File Uploader
30
+ uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])
31
+
32
+ # Prompt Template for Summarization
33
+ initial_prompt_template = """
34
+ Write a concise summary of the following content:
35
+ Content: {text}
36
+ """
37
+ initial_prompt = PromptTemplate(template=initial_prompt_template, input_variables=["text"])
38
+
39
+ # Define the refinement prompt
40
+ refinement_prompt_template = """
41
+ The following is a summary that needs refinement:
42
+ Current Summary: {existing_answer}
43
+
44
+ We have additional content that can be used to refine the summary:
45
+ Content: {text}
46
+
47
+ Please refine the current summary to include the new information while maintaining conciseness.
48
+ """
49
+ refinement_prompt = PromptTemplate(template=refinement_prompt_template, input_variables=["existing_answer", "text"])
50
+
51
+ # Initialize LLM with Groq API Key
52
+ llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
53
+
54
+ # Button to Summarize Content
55
+ if st.button("Summarize the Content"):
56
+ if not groq_api_key.strip():
57
+ st.error("Please provide the Groq API Key to get started.")
58
+ elif not langsmith_api_key.strip():
59
+ st.error("Please provide the LangSmith API Key for tracking.")
60
+ elif not (generic_url.strip() or uploaded_file):
61
+ st.error("Please provide a valid URL or upload a PDF file.")
62
+ elif generic_url and not validators.url(generic_url):
63
+ st.error("Please enter a valid URL. It can be a YouTube video or website URL.")
64
+ else:
65
+ try:
66
+ with st.spinner("Processing..."):
67
+ # Load content from URL (YouTube or Website)
68
+ if generic_url.strip():
69
+ if "youtube.com" in generic_url:
70
+ loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
71
+ else:
72
+ loader = UnstructuredURLLoader(
73
+ urls=[generic_url],
74
+ ssl_verify=False,
75
+ headers={
76
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
77
+ },
78
+ )
79
+ docs = loader.load()
80
+
81
+ # Load content from uploaded PDF
82
+ elif uploaded_file:
83
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
84
+ temp_file.write(uploaded_file.read())
85
+ temp_file_path = temp_file.name
86
+
87
+ # Load the PDF using PyPDFLoader
88
+ loader = PyPDFLoader(temp_file_path)
89
+ docs = loader.load_and_split()
90
+
91
+ # Summarize the content with LangSmith tracking enabled
92
+ chain = load_summarize_chain(
93
+ llm,
94
+ chain_type="refine",
95
+ question_prompt=initial_prompt,
96
+ refine_prompt=refinement_prompt,
97
+ verbose=True
98
+ )
99
+ output_summary = chain.run(docs)
100
+
101
+ # Display the summary
102
+ st.success(output_summary)
103
+
104
+ except Exception as e:
105
+ st.exception(f"Exception: {e}")
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ ipykernel
3
+ langchain-community
4
+ pypdf
5
+ pymupdf
6
+ langchain-text-splitters
7
+ langchain-openai
8
+ sentence_transformers
9
+ langchain_huggingface
10
+ duckdb
11
+ pandas
12
+ openai
13
+ langchain-groq
14
+ duckduckgo_search==5.3.1b1
15
+ pymupdf
16
+ validators==0.28.1
17
+ youtube_transcript_api
18
+ unstructured
19
+ pytube
20
+ numexpr
21
+ huggingface_hub