Lovish Singla commited on
Commit
1eb24e1
·
unverified ·
1 Parent(s): 2810640

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -15,9 +15,9 @@ st.subheader("Summarize Content from a URL or Uploaded PDF")
15
 
16
  # Sidebar: API Key Inputs
17
  with st.sidebar:
18
- st.write("get your groq api key from https://groq.com/ and get your langsmith api key from https://langsmith.com/")
19
  groq_api_key = st.text_input("Groq API Key", value="", type="password")
20
- langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password") # LangSmith API Key
21
 
22
  # Set LangSmith environment variables
23
  if langsmith_api_key:
@@ -30,31 +30,28 @@ generic_url = st.text_input("URL (YouTube or Website)", label_visibility="collap
30
  # PDF File Uploader
31
  uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])
32
 
33
- # Prompt Template for Summarization
34
- initial_prompt_template = """
35
- Write a concise summary of the following content:
36
- Content: {text}
37
- """
38
- initial_prompt = PromptTemplate(template=initial_prompt_template, input_variables=["text"])
39
 
40
- # Define the refinement prompt
41
- refinement_prompt_template = """
42
- The following is a summary that needs refinement:
43
- Current Summary: {existing_answer}
 
 
44
 
45
- We have additional content that can be used to refine the summary:
46
- Content: {text}
47
-
48
- Please refine the current summary to include the new information while maintaining conciseness.
49
- """
50
- refinement_prompt = PromptTemplate(template=refinement_prompt_template, input_variables=["existing_answer", "text"])
51
-
52
- # Initialize LLM with Groq API Key
53
  if groq_api_key:
54
  try:
55
  llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
56
  except Exception as e:
57
  st.error(f"Failed to initialize Groq client: {e}")
 
 
 
58
 
59
  # Button to Summarize Content
60
  if st.button("Summarize the Content"):
@@ -66,45 +63,50 @@ if st.button("Summarize the Content"):
66
  st.error("Please provide a valid URL or upload a PDF file.")
67
  elif generic_url and not validators.url(generic_url):
68
  st.error("Please enter a valid URL. It can be a YouTube video or website URL.")
 
 
69
  else:
70
  try:
71
  with st.spinner("Processing..."):
72
- # Load content from URL (YouTube or Website)
 
 
73
  if generic_url.strip():
74
- if "youtube.com" in generic_url:
75
  loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
76
  else:
77
  loader = UnstructuredURLLoader(
78
  urls=[generic_url],
79
  ssl_verify=False,
80
  headers={
81
- "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
 
 
82
  },
83
  )
84
  docs = loader.load()
85
 
86
- # Load content from uploaded PDF
87
  elif uploaded_file:
88
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
89
  temp_file.write(uploaded_file.read())
90
  temp_file_path = temp_file.name
91
-
92
- # Load the PDF using PyPDFLoader
93
  loader = PyPDFLoader(temp_file_path)
94
  docs = loader.load_and_split()
95
 
96
- # Summarize the content with LangSmith tracking enabled
97
- chain = load_summarize_chain(
98
- llm,
99
- chain_type="refine",
100
- question_prompt=initial_prompt,
101
- refine_prompt=refinement_prompt,
102
- verbose=True
103
- )
104
- output_summary = chain.run(docs)
105
-
106
- # Display the summary
107
- st.success(output_summary)
 
108
 
109
  except Exception as e:
110
  st.exception(f"Exception: {e}")
 
15
 
16
  # Sidebar: API Key Inputs
17
  with st.sidebar:
18
+ st.write("Get your Groq API key from https://groq.com/ and your LangSmith API key from https://langsmith.com/")
19
  groq_api_key = st.text_input("Groq API Key", value="", type="password")
20
+ langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password")
21
 
22
  # Set LangSmith environment variables
23
  if langsmith_api_key:
 
30
  # PDF File Uploader
31
  uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])
32
 
33
+ # Prompt Templates
34
+ initial_prompt = PromptTemplate(
35
+ template="Write a concise summary of the following content:\nContent: {text}",
36
+ input_variables=["text"]
37
+ )
 
38
 
39
+ refinement_prompt = PromptTemplate(
40
+ template="The following is a summary that needs refinement:\nCurrent Summary: {existing_answer}\n\n"
41
+ "We have additional content that can be used to refine the summary:\nContent: {text}\n\n"
42
+ "Please refine the current summary to include the new information while maintaining conciseness.",
43
+ input_variables=["existing_answer", "text"]
44
+ )
45
 
46
+ # Initialize LLM
 
 
 
 
 
 
 
47
  if groq_api_key:
48
  try:
49
  llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
50
  except Exception as e:
51
  st.error(f"Failed to initialize Groq client: {e}")
52
+ llm = None
53
+ else:
54
+ llm = None
55
 
56
  # Button to Summarize Content
57
  if st.button("Summarize the Content"):
 
63
  st.error("Please provide a valid URL or upload a PDF file.")
64
  elif generic_url and not validators.url(generic_url):
65
  st.error("Please enter a valid URL. It can be a YouTube video or website URL.")
66
+ elif not llm:
67
+ st.error("LLM not initialized. Please check your API key.")
68
  else:
69
  try:
70
  with st.spinner("Processing..."):
71
+ docs = []
72
+
73
+ # Load from URL
74
  if generic_url.strip():
75
+ if "youtube.com" in generic_url or "youtu.be" in generic_url:
76
  loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
77
  else:
78
  loader = UnstructuredURLLoader(
79
  urls=[generic_url],
80
  ssl_verify=False,
81
  headers={
82
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) "
83
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
84
+ "Chrome/116.0.0.0 Safari/537.36"
85
  },
86
  )
87
  docs = loader.load()
88
 
89
+ # Load from PDF
90
  elif uploaded_file:
91
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
92
  temp_file.write(uploaded_file.read())
93
  temp_file_path = temp_file.name
 
 
94
  loader = PyPDFLoader(temp_file_path)
95
  docs = loader.load_and_split()
96
 
97
+ # Safety check
98
+ if not docs:
99
+ st.error("❌ No content could be extracted from the given source. Please try another file or URL.")
100
+ else:
101
+ chain = load_summarize_chain(
102
+ llm,
103
+ chain_type="refine",
104
+ question_prompt=initial_prompt,
105
+ refine_prompt=refinement_prompt,
106
+ verbose=True
107
+ )
108
+ output_summary = chain.run(docs)
109
+ st.success(output_summary)
110
 
111
  except Exception as e:
112
  st.exception(f"Exception: {e}")