anuragbb's picture
Update app.py
dcab7f7 verified
import os
import requests
import json
from typing import List, Optional
from dotenv import load_dotenv
from bs4 import BeautifulSoup
import gradio as gr
import google.generativeai as genai
import ollama
# Load environment variables
load_dotenv()
google_api_key = os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=google_api_key)
class Website:
"""
A utility class to represent and scrape website content with robust error handling.
"""
def __init__(self, url: str, timeout: int = 10):
self.url = url
self.title = "No title found"
self.text = ""
self.links = []
self.relevant_links = []
try:
response = self._fetch_webpage(url, timeout)
if response:
self._parse_webpage(response)
except Exception as e:
print(f"Error processing {url}: {e}")
def _fetch_webpage(self, url: str, timeout: int) -> Optional[requests.Response]:
try:
parsed_url = urlparse(url)
if not all([parsed_url.scheme, parsed_url.netloc]):
print(f"Invalid URL: {url}")
return None
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=timeout)
response.raise_for_status()
return response
except (requests.RequestException, ValueError) as e:
print(f"Request failed for {url}: {e}")
return None
def _parse_webpage(self, response: requests.Response):
soup = BeautifulSoup(response.content, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
if soup.body:
for irrelevant in soup.body(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.body.get_text(separator="\n", strip=True)
links = [urljoin(self.url, link.get('href')) for link in soup.find_all('a') if link.get('href')]
self.links = list(set(links))
self.relevant_links = self._filter_relevant_links(self.links)
def _filter_relevant_links(self, links: List[str]) -> List[str]:
relevant_keywords = ["about", "careers", "contact", "company", "jobs"]
return [link for link in links if any(keyword in link.lower() for keyword in relevant_keywords)]
def get_contents(self) -> str:
return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
def __repr__(self) -> str:
return f"Website(url='{self.url}', title='{self.title}', links={len(self.links)})"
link_system_prompt = (
"Now You are an assistant that analyzes the contents of several relevant pages from a company website "
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown. "
"Include details of company culture, customers and careers/jobs if you have the information. Include hyperlinks of social media platforms."
)
def stream_llama(prompt):
messages = [
{"role": "system", "content": link_system_prompt},
{"role": "user", "content": prompt}
]
stream = ollama.chat(
model='llama3.2',
messages=messages,
stream=True
)
result = ""
for chunk in stream:
result += chunk['message']['content']
yield result
def stream_gemma(prompt):
messages = [
{"role": "system", "content": link_system_prompt},
{"role": "user", "content": prompt}
]
result = ollama.chat(
model="gemma2",
messages=messages,
stream=True
)
response = ""
for chunk in result:
response += chunk['message']['content']
yield response
def stream_gemini(prompt):
model = genai.GenerativeModel(model_name="gemini-1.5-pro", system_instruction=link_system_prompt)
response = model.generate_content(prompt, stream=True)
result = ""
for chunks in response:
if chunks.text:
result += chunks.text
yield result
def stream_brochure(company_name, url, model):
prompt = f"Please generate a company brochure for {company_name}.\n"
prompt += Website(url).get_contents()
if model == "GEMINI-1.5-PRO":
result = stream_gemini(prompt)
elif model == "GEMMA2":
result = stream_gemma(prompt)
elif model == "LLAMA3.2":
result = stream_llama(prompt)
else:
raise ValueError("Unknown model")
yield from result
view = gr.Interface(
fn=stream_brochure,
inputs=[
gr.Textbox(label="Company Name:", placeholder="Enter the company name here"),
gr.Textbox(label="Landing Page URL:", placeholder="Enter the URL including http:// or https://"),
gr.Dropdown(["GEMINI-1.5-PRO","LLAMA3.2", "GEMMA2"], label="Select Model")
],
outputs=[gr.Markdown(label="Brochure:")],
title="Company Brochure Generator",
description="Generate a professional brochure for your company using AI models. Simply provide the company name, landing page URL, and select the model.",
theme="default",
flagging_mode="never"
)
if __name__ == "__main__":
view.launch()