Spaces:
Sleeping
Sleeping
| import os | |
| import openai | |
| import streamlit as st | |
| import trafilatura | |
| from langchain import PromptTemplate, OpenAI, LLMChain | |
| # Definition of the style | |
| custom_css = """ | |
| <style> | |
| .title-text { | |
| color: #0077B6; /* Cambia el color del subtítulo a un azul más claro */ | |
| } | |
| </style> | |
| """ | |
| st.sidebar.write("This program uses the OpenAI API to function. Please enter your API key below to use the program. ") | |
| OPENAI_API_KEY = st.sidebar.text_input("API KEY:") | |
| class WebContentSummarizerApp: | |
| def __init__(self): | |
| st.title("WebContent Summarizer") | |
| # Insert the custom CSS into the application | |
| st.markdown(custom_css, unsafe_allow_html=True) | |
| # Use a CSS class for the subtitle | |
| st.markdown('<p class="title-text">With just a simple URL, simplify your online research and content consumption. ' | |
| 'This AI tool extracts and condenses the essential information from web pages into a concise, easy-to-read paragraph.</p>', unsafe_allow_html=True) | |
| self.url = st.text_input("Paste or type the URL you want to summarize:") | |
| self.run_button = st.button("Summarize") | |
| def scrape_website(url): | |
| downloaded = trafilatura.fetch_url(url) | |
| main_text = trafilatura.extract(downloaded) | |
| return main_text | |
| def run(self): | |
| if self.run_button: | |
| if self.url: | |
| url = self.url | |
| with st.spinner("Getting information from the page..."): | |
| self.text = self.scrape_website(url) | |
| with st.spinner("Analyzing and organizing..."): | |
| result = self.llm_text_insight() | |
| st.write("Summarized content:") | |
| st.write(result) | |
| else: | |
| st.warning("Please enter a valid URL and your OpenAI API Key.") | |
| def llm_text_insight(self): | |
| openai.api_key = OPENAI_API_KEY | |
| text = self.text | |
| # Split the text into smaller fragments | |
| max_length = 4096 | |
| chunks = [text[i:i+max_length] for i in range(0, len(text), max_length)] | |
| results = [] | |
| template = """ | |
| Please take the following text interprete and transform it into an organized paragraph for a study. | |
| Ensure that the paragraph is clear and coherent. If necessary, add or rearrange ideas to achieve a logical structure. | |
| The text is as follows: | |
| text: {input} | |
| Make sure the text is only one paragraph | |
| """ | |
| llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY) | |
| prompt_template = PromptTemplate.from_template(template=template) | |
| chain = LLMChain(llm=llm, prompt=prompt_template) | |
| for chunk in chunks: | |
| result = chain.predict(input=chunk) | |
| results.append(result) | |
| # Check if results contain more than one paragraph and final result | |
| if len(results) > 1: | |
| alternate_input = " ".join(results) | |
| llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY) | |
| prompt_template = PromptTemplate.from_template(template=template) | |
| chain = LLMChain(llm=llm, prompt=prompt_template) | |
| result = chain.predict(input=alternate_input) | |
| return result | |
| else: | |
| # Concatenation of the results | |
| return " ".join(results) | |
| if __name__ == "__main__": | |
| app = WebContentSummarizerApp() | |
| app.run() | |