Spaces:

lovi07
/

Text_Summarization

Sleeping

App Files Files Community

Lovish Singla commited on Aug 9, 2025

Commit

1eb24e1

unverified ·

1 Parent(s): 2810640

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -38

app.py CHANGED Viewed

@@ -15,9 +15,9 @@ st.subheader("Summarize Content from a URL or Uploaded PDF")
 # Sidebar: API Key Inputs
 with st.sidebar:
-    st.write("get your groq api key from https://groq.com/ and get your langsmith api key from https://langsmith.com/")
     groq_api_key = st.text_input("Groq API Key", value="", type="password")
-    langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password")  # LangSmith API Key
 # Set LangSmith environment variables
 if langsmith_api_key:
@@ -30,31 +30,28 @@ generic_url = st.text_input("URL (YouTube or Website)", label_visibility="collap
 # PDF File Uploader
 uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])
-# Prompt Template for Summarization
-initial_prompt_template = """
-Write a concise summary of the following content:
-Content: {text}
-"""
-initial_prompt = PromptTemplate(template=initial_prompt_template, input_variables=["text"])
-# Define the refinement prompt
-refinement_prompt_template = """
-The following is a summary that needs refinement:
-Current Summary: {existing_answer}
-We have additional content that can be used to refine the summary:
-Content: {text}
-Please refine the current summary to include the new information while maintaining conciseness.
-"""
-refinement_prompt = PromptTemplate(template=refinement_prompt_template, input_variables=["existing_answer", "text"])
-# Initialize LLM with Groq API Key
 if groq_api_key:
     try:
         llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
     except Exception as e:
         st.error(f"Failed to initialize Groq client: {e}")
 # Button to Summarize Content
 if st.button("Summarize the Content"):
@@ -66,45 +63,50 @@ if st.button("Summarize the Content"):
         st.error("Please provide a valid URL or upload a PDF file.")
     elif generic_url and not validators.url(generic_url):
         st.error("Please enter a valid URL. It can be a YouTube video or website URL.")
     else:
         try:
             with st.spinner("Processing..."):
-                # Load content from URL (YouTube or Website)
                 if generic_url.strip():
-                    if "youtube.com" in generic_url:
                         loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
                     else:
                         loader = UnstructuredURLLoader(
                             urls=[generic_url],
                             ssl_verify=False,
                             headers={
-                                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"
                             },
                         )
                     docs = loader.load()
-                # Load content from uploaded PDF
                 elif uploaded_file:
                     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
                         temp_file.write(uploaded_file.read())
                         temp_file_path = temp_file.name
-                    # Load the PDF using PyPDFLoader
                     loader = PyPDFLoader(temp_file_path)
                     docs = loader.load_and_split()
-                # Summarize the content with LangSmith tracking enabled
-                chain = load_summarize_chain(
-                    llm,
-                    chain_type="refine",
-                    question_prompt=initial_prompt,
-                    refine_prompt=refinement_prompt,
-                    verbose=True
-                )
-                output_summary = chain.run(docs)
-                # Display the summary
-                st.success(output_summary)
         except Exception as e:
             st.exception(f"Exception: {e}")

 # Sidebar: API Key Inputs
 with st.sidebar:
+    st.write("Get your Groq API key from https://groq.com/ and your LangSmith API key from https://langsmith.com/")
     groq_api_key = st.text_input("Groq API Key", value="", type="password")
+    langsmith_api_key = st.text_input("LangSmith API Key", value="", type="password")
 # Set LangSmith environment variables
 if langsmith_api_key:
 # PDF File Uploader
 uploaded_file = st.file_uploader("Upload a PDF File", type=["pdf"])
+# Prompt Templates
+initial_prompt = PromptTemplate(
+    template="Write a concise summary of the following content:\nContent: {text}",
+    input_variables=["text"]
+)
+refinement_prompt = PromptTemplate(
+    template="The following is a summary that needs refinement:\nCurrent Summary: {existing_answer}\n\n"
+             "We have additional content that can be used to refine the summary:\nContent: {text}\n\n"
+             "Please refine the current summary to include the new information while maintaining conciseness.",
+    input_variables=["existing_answer", "text"]
+)
+# Initialize LLM
 if groq_api_key:
     try:
         llm = ChatGroq(model="gemma2-9b-it", groq_api_key=groq_api_key)
     except Exception as e:
         st.error(f"Failed to initialize Groq client: {e}")
+        llm = None
+else:
+    llm = None
 # Button to Summarize Content
 if st.button("Summarize the Content"):
         st.error("Please provide a valid URL or upload a PDF file.")
     elif generic_url and not validators.url(generic_url):
         st.error("Please enter a valid URL. It can be a YouTube video or website URL.")
+    elif not llm:
+        st.error("LLM not initialized. Please check your API key.")
     else:
         try:
             with st.spinner("Processing..."):
+                docs = []
+                # Load from URL
                 if generic_url.strip():
+                    if "youtube.com" in generic_url or "youtu.be" in generic_url:
                         loader = YoutubeLoader.from_youtube_url(generic_url, add_video_info=True)
                     else:
                         loader = UnstructuredURLLoader(
                             urls=[generic_url],
                             ssl_verify=False,
                             headers={
+                                "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) "
+                                              "AppleWebKit/537.36 (KHTML, like Gecko) "
+                                              "Chrome/116.0.0.0 Safari/537.36"
                             },
                         )
                     docs = loader.load()
+                # Load from PDF
                 elif uploaded_file:
                     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
                         temp_file.write(uploaded_file.read())
                         temp_file_path = temp_file.name
                     loader = PyPDFLoader(temp_file_path)
                     docs = loader.load_and_split()
+                # Safety check
+                if not docs:
+                    st.error("❌ No content could be extracted from the given source. Please try another file or URL.")
+                else:
+                    chain = load_summarize_chain(
+                        llm,
+                        chain_type="refine",
+                        question_prompt=initial_prompt,
+                        refine_prompt=refinement_prompt,
+                        verbose=True
+                    )
+                    output_summary = chain.run(docs)
+                    st.success(output_summary)
         except Exception as e:
             st.exception(f"Exception: {e}")