import streamlit as st import requests from bs4 import BeautifulSoup import re from cleantext import clean import streamlit.components.v1 as component from transformers import pipeline from functions import Copy_Text from functions import * ### import animation def particle(Js_file): with open(Js_file) as f: component.html(f"{f.read()}", height=400) ### insert external css def insert_css(css_file:str): with open(css_file) as f: st.markdown(f"",unsafe_allow_html=True) # page settings st.set_page_config( layout="wide", initial_sidebar_state="collapsed" ) # sidebar app_sidebar = st.sidebar with app_sidebar: select_mode = st.selectbox( label="Select Mode", options=["Summarizer","Que/Ans"], key="mode selector", index=0 ) if select_mode == "Que/Ans": st.write("### Que/Ans Settings") max_answer_length = st.slider( label="Max answer", min_value=1, max_value=10, key="max answer", value=4 ) max_answer_length = max_answer_length*10 Best_size = st.slider( label="n best size", min_value=1, max_value=10, key="best size", value=5 ) # Initialize session state if 'scraped_paragraphs' not in st.session_state: st.session_state.scraped_paragraphs = [] if 'summarizer_mode' not in st.session_state: st.session_state.summarizer_mode = False if 'summary' not in st.session_state: st.session_state.summary = [] app_col = st.columns([2,8,2],gap="small") with app_col[0]: pass with app_col[2]: pass with app_col[1]: # Title st.write("## GenAi Scraper") # Input URL url_input = st.text_input(label="Enter Website URL",key="url input",placeholder="https://www.example.com") # number of paragraphs num_paragraphs = st.slider("Select number of paragraphs to scrape", 1, 30, 5) scrap_btn = st.button("Scrape Paragraphs",key="scrap button") if url_input.strip() == "" and not scrap_btn: # animation particle("animation/particles.html") else: if scrap_btn: st.session_state.scraped_paragraphs = scrape_paragraphs(url_input, num_paragraphs) st.session_state.summary = [] # Reset summary # Display scraped paragraphs if st.session_state.scraped_paragraphs: st.write("### Scraped Paragraphs") paragraph_scrap = "\n\n".join(st.session_state.scraped_paragraphs) st.write(Text_Cleaning(paragraph_scrap)) Copy_Text(Text_Cleaning(paragraph_scrap)) ## copy text #################### summarizer ############# if select_mode == "Summarizer": if st.session_state.scraped_paragraphs: # Toggle for summarization mode st.session_state.summarizer_mode = st.toggle("Enable Summarizer Mode", st.session_state.summarizer_mode) if st.session_state.summarizer_mode: value_func = lambda x: x * 0.3 max_tokens = st.slider(label="Select Max Token Length", min_value=10, max_value=sum(len(p.split()) for p in st.session_state.scraped_paragraphs), value=int(value_func( sum(len(p.split()) for p in st.session_state.scraped_paragraphs) )) ) if st.button("📄 Generate Summary"): with st.spinner("Generating Summary..."): try: summarizer = pipeline("summarization", model="facebook/bart-large-cnn") st.session_state.summary = summarizer( Text_Cleaning(" ".join(st.session_state.scraped_paragraphs)), max_length=max_tokens+20, min_length=max_tokens, do_sample=False ) except Exception as e: st.warning(f"Error...\n{e}",icon="⚠️") # Display summary if st.session_state.summary: st.write("### Summary") generated_summary = st.session_state.summary[0]['summary_text'] st.write(generated_summary) Copy_Text(generated_summary) ################# question answering ##################### elif select_mode == "Que/Ans": if st.session_state.scraped_paragraphs: if st.toggle(label="Question Answering",key="Q/A"): # Inject custom CSS to place the chat input at the bottom st.markdown( """ """, unsafe_allow_html=True ) qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") # Initialize session state if "messages" not in st.session_state: st.session_state.messages = [] # User inputs context context = Text_Cleaning(paragraph_scrap) # Display chat history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if context: user_input = st.chat_input("💬 Ask a question ",) if user_input: with st.chat_message("user"): st.markdown(user_input) st.session_state.messages.append({"role": "user", "content": user_input}) with st.spinner("🤔 Thinking..."): response = qa_pipeline({"question": user_input, "context": context}, max_answer_len=max_answer_length, n_best_size=Best_size) answer = response["answer"] with st.chat_message("assistant"): st.markdown(f"{answer}") st.session_state.messages.append({"role": "assistant", "content": f"{answer}"}) # Clear chat history button if st.button("🗑️ Clear Chat"): st.session_state.messages = [] st.rerun() # app settings css insert_css("css_files/app.css")