bala00712200502 commited on
Commit
5bdae9e
Β·
verified Β·
1 Parent(s): 8062622

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -161
app.py CHANGED
@@ -1,179 +1,74 @@
 
 
1
  import os
 
2
  import requests
3
  from bs4 import BeautifulSoup
4
  from dotenv import load_dotenv
5
- from crewai import Agent, Task, Crew, Process
6
- from langchain_openai import ChatOpenAI
7
- import gradio as gr
8
- from tenacity import retry, stop_after_attempt, wait_exponential
9
- import logging
10
-
11
- # Configure logging
12
- logging.basicConfig(level=logging.INFO)
13
- logger = logging.getLogger(__name__)
14
 
15
  # Load environment variables
16
  load_dotenv()
 
 
17
 
18
- # Initialize OpenAI LLM with robust error handling
19
- class SafeChatOpenAI(ChatOpenAI):
20
- @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
21
- def _call(self, *args, **kwargs):
22
- try:
23
- return super()._call(*args, **kwargs)
24
- except Exception as e:
25
- logger.error(f"OpenAI API Error: {str(e)}")
26
- if "Incorrect API key" in str(e):
27
- raise ValueError("Invalid OpenAI API key configuration")
28
- raise ConnectionError("OpenAI service unavailable. Please try again later.")
29
-
30
- try:
31
- llm = SafeChatOpenAI(
32
- model="gpt-3.5-turbo",
33
- temperature=0.5, # More deterministic output
34
- request_timeout=60,
35
- max_retries=2
36
- )
37
- except Exception as e:
38
- logger.critical(f"LLM initialization failed: {str(e)}")
39
- raise RuntimeError("Failed to initialize AI services")
40
 
41
- class WebScraperAgent:
42
- def __init__(self):
43
- try:
44
- # Define agents
45
- self.scraper_agent = Agent(
46
- role='Senior Web Scraper',
47
- goal='Extract clean content from any webpage',
48
- backstory="""Expert in extracting information from complex websites,
49
- adept at handling various structures and formats.""",
50
- verbose=False,
51
- llm=llm
52
- )
53
-
54
- self.analyst_agent = Agent(
55
- role='Content Analyst',
56
- goal='Provide clear, concise summaries',
57
- backstory="""Specializes in analyzing and summarizing web content
58
- into key points and actionable insights.""",
59
- verbose=False,
60
- llm=llm
61
- )
62
- except Exception as e:
63
- logger.error(f"Agent creation failed: {str(e)}")
64
- raise
65
 
66
- @retry(stop=stop_after_attempt(2), wait=wait_exponential(multiplier=1, min=2, max=5))
67
- def scrape_website(self, url):
68
- """Robust web scraping function with error handling"""
69
- try:
70
- headers = {
71
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
72
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
73
- 'Accept-Language': 'en-US,en;q=0.5'
74
- }
75
-
76
- response = requests.get(url, headers=headers, timeout=20)
77
- response.raise_for_status()
78
-
79
- soup = BeautifulSoup(response.text, 'html.parser')
80
-
81
- # Remove unwanted elements
82
- for element in soup(['script', 'style', 'nav', 'footer', 'iframe', 'noscript']):
83
- element.decompose()
84
-
85
- # Get clean text
86
- text = soup.get_text(separator='\n', strip=True)
87
- return text[:3000] # Limit to avoid token limits
88
- except Exception as e:
89
- logger.warning(f"Failed to scrape {url}: {str(e)}")
90
- raise ConnectionError(f"Couldn't access this website. Error: {str(e)}")
91
 
92
- def analyze_content(self, content):
93
- """Process content through AI analysis pipeline"""
94
- try:
95
- # Define tasks
96
- scrape_task = Task(
97
- description="Extract and clean the main content from this webpage data.",
98
- expected_output="Well-formatted text containing the core content.",
99
- agent=self.scraper_agent
100
- )
 
 
 
 
 
 
101
 
102
- analyze_task = Task(
103
- description="Analyze this content and extract key information.",
104
- expected_output="""Concise summary with:
105
- - 3-5 key bullet points
106
- - Main topics covered
107
- - Any important statistics or facts""",
108
- agent=self.analyst_agent
109
- )
110
 
111
- # Create and run crew
112
- crew = Crew(
113
- agents=[self.scraper_agent, self.analyst_agent],
114
- tasks=[scrape_task, analyze_task],
115
- verbose=False,
116
- process=Process.sequential
117
- )
118
 
119
- return crew.kickoff(inputs={'content': content})
120
- except Exception as e:
121
- logger.error(f"Analysis failed: {str(e)}")
122
- raise RuntimeError(f"Analysis error: {str(e)}")
123
 
124
- # Gradio Interface
125
- def create_interface():
126
- scraper = WebScraperAgent()
127
-
128
- def process_url(url):
129
- try:
130
- # Step 1: Scrape
131
- content = scraper.scrape_website(url)
132
- # Step 2: Analyze
133
- return scraper.analyze_content(content)
134
- except Exception as e:
135
- return f"❌ Error: {str(e)}"
136
 
137
- with gr.Blocks(title="AI Web Scraper", theme=gr.themes.Soft()) as app:
138
- gr.Markdown("""
139
- # 🌐 AI-Powered Web Scraper
140
- *Extract and summarize content from any website*
141
- """)
142
-
143
- with gr.Row():
144
- url_input = gr.Textbox(
145
- label="Enter Website URL",
146
- placeholder="https://example.com",
147
- max_lines=1
148
- )
149
- submit_btn = gr.Button("Analyze", variant="primary")
150
-
151
- output = gr.Markdown(
152
- label="Analysis Results",
153
- elem_classes=["output-box"]
154
- )
155
-
156
- submit_btn.click(
157
- fn=process_url,
158
- inputs=url_input,
159
- outputs=output
160
- )
161
-
162
- gr.Examples(
163
- examples=[
164
- ["https://en.wikipedia.org/wiki/Artificial_intelligence"],
165
- ["https://www.nasa.gov/about/index.html"],
166
- ["https://www.w3schools.com/python/"]
167
- ],
168
- inputs=url_input,
169
- label="Try these examples"
170
- )
171
-
172
- return app
173
 
 
174
  if __name__ == "__main__":
175
- app = create_interface()
176
- app.launch(
177
- server_name="0.0.0.0",
178
- server_port=7860
179
- )
 
1
+ # web_summarizer_app.py
2
+
3
  import os
4
+ import gradio as gr
5
  import requests
6
  from bs4 import BeautifulSoup
7
  from dotenv import load_dotenv
8
+ import openai
 
 
 
 
 
 
 
 
9
 
10
  # Load environment variables
11
  load_dotenv()
12
+ openai.api_key = os.getenv("OPENAI_API_KEY")
13
+ model = os.getenv("OPENAI_MODEL", "gpt-3.5-turbo")
14
 
15
+ # 🌐 Web Scraper
16
+ def scrape_text_from_url(url):
17
+ try:
18
+ response = requests.get(url, timeout=10)
19
+ soup = BeautifulSoup(response.content, "html.parser")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Remove scripts and style
22
+ for tag in soup(["script", "style"]):
23
+ tag.decompose()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
+ # Extract visible text
26
+ text = " ".join(chunk.strip() for chunk in soup.stripped_strings)
27
+ return text[:5000] # limit to avoid token overflow
28
+ except Exception as e:
29
+ return f"❌ Error scraping the page: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
+ # 🧠 LLM Summarizer
32
+ def summarize_with_gpt(text):
33
+ try:
34
+ response = openai.ChatCompletion.create(
35
+ model=model,
36
+ messages=[
37
+ {"role": "system", "content": "You are a helpful assistant that summarizes articles."},
38
+ {"role": "user", "content": f"Please summarize the following content:\n\n{text}"}
39
+ ],
40
+ temperature=0.7,
41
+ max_tokens=500
42
+ )
43
+ return response.choices[0].message.content.strip()
44
+ except Exception as e:
45
+ return f"❌ Error from OpenAI: {str(e)}"
46
 
47
+ # πŸ” Combined Function
48
+ def scrape_and_summarize(url):
49
+ raw_text = scrape_text_from_url(url)
50
+ if "❌" in raw_text:
51
+ return raw_text, ""
52
+ summary = summarize_with_gpt(raw_text)
53
+ return raw_text, summary
 
54
 
55
+ # 🎨 Gradio UI
56
+ with gr.Blocks(title="πŸ”Ž Web Summarizer with AI") as demo:
57
+ gr.Markdown("## 🧠🌐 Web Article Summarizer")
58
+ gr.Markdown("Enter a webpage URL below. The AI will scrape and summarize the content.")
 
 
 
59
 
60
+ with gr.Row():
61
+ url_input = gr.Textbox(label="πŸ”— Enter URL", placeholder="https://example.com", scale=4)
62
+ btn = gr.Button("Summarize", variant="primary")
 
63
 
64
+ with gr.Row():
65
+ with gr.Column(scale=1):
66
+ raw_output = gr.Textbox(label="πŸ“ Raw Scraped Text", lines=15, interactive=False)
67
+ with gr.Column(scale=1):
68
+ summary_output = gr.Textbox(label="πŸ“„ AI Summary", lines=15, interactive=False)
 
 
 
 
 
 
 
69
 
70
+ btn.click(scrape_and_summarize, inputs=[url_input], outputs=[raw_output, summary_output])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
+ # πŸš€ Launch app
73
  if __name__ == "__main__":
74
+ demo.launch()