import os import requests import json from typing import List, Optional from dotenv import load_dotenv from bs4 import BeautifulSoup import gradio as gr import google.generativeai as genai import ollama # Load environment variables load_dotenv() google_api_key = os.getenv('GOOGLE_API_KEY') genai.configure(api_key=google_api_key) class Website: """ A utility class to represent and scrape website content with robust error handling. """ def __init__(self, url: str, timeout: int = 10): self.url = url self.title = "No title found" self.text = "" self.links = [] self.relevant_links = [] try: response = self._fetch_webpage(url, timeout) if response: self._parse_webpage(response) except Exception as e: print(f"Error processing {url}: {e}") def _fetch_webpage(self, url: str, timeout: int) -> Optional[requests.Response]: try: parsed_url = urlparse(url) if not all([parsed_url.scheme, parsed_url.netloc]): print(f"Invalid URL: {url}") return None headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers, timeout=timeout) response.raise_for_status() return response except (requests.RequestException, ValueError) as e: print(f"Request failed for {url}: {e}") return None def _parse_webpage(self, response: requests.Response): soup = BeautifulSoup(response.content, 'html.parser') self.title = soup.title.string if soup.title else "No title found" if soup.body: for irrelevant in soup.body(["script", "style", "img", "input"]): irrelevant.decompose() self.text = soup.body.get_text(separator="\n", strip=True) links = [urljoin(self.url, link.get('href')) for link in soup.find_all('a') if link.get('href')] self.links = list(set(links)) self.relevant_links = self._filter_relevant_links(self.links) def _filter_relevant_links(self, links: List[str]) -> List[str]: relevant_keywords = ["about", "careers", "contact", "company", "jobs"] return [link for link in links if any(keyword in link.lower() for keyword in relevant_keywords)] def get_contents(self) -> str: return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n" def __repr__(self) -> str: return f"Website(url='{self.url}', title='{self.title}', links={len(self.links)})" link_system_prompt = ( "Now You are an assistant that analyzes the contents of several relevant pages from a company website " "and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown. " "Include details of company culture, customers and careers/jobs if you have the information. Include hyperlinks of social media platforms." ) def stream_llama(prompt): messages = [ {"role": "system", "content": link_system_prompt}, {"role": "user", "content": prompt} ] stream = ollama.chat( model='llama3.2', messages=messages, stream=True ) result = "" for chunk in stream: result += chunk['message']['content'] yield result def stream_gemma(prompt): messages = [ {"role": "system", "content": link_system_prompt}, {"role": "user", "content": prompt} ] result = ollama.chat( model="gemma2", messages=messages, stream=True ) response = "" for chunk in result: response += chunk['message']['content'] yield response def stream_gemini(prompt): model = genai.GenerativeModel(model_name="gemini-1.5-pro", system_instruction=link_system_prompt) response = model.generate_content(prompt, stream=True) result = "" for chunks in response: if chunks.text: result += chunks.text yield result def stream_brochure(company_name, url, model): prompt = f"Please generate a company brochure for {company_name}.\n" prompt += Website(url).get_contents() if model == "GEMINI-1.5-PRO": result = stream_gemini(prompt) elif model == "GEMMA2": result = stream_gemma(prompt) elif model == "LLAMA3.2": result = stream_llama(prompt) else: raise ValueError("Unknown model") yield from result view = gr.Interface( fn=stream_brochure, inputs=[ gr.Textbox(label="Company Name:", placeholder="Enter the company name here"), gr.Textbox(label="Landing Page URL:", placeholder="Enter the URL including http:// or https://"), gr.Dropdown(["GEMINI-1.5-PRO","LLAMA3.2", "GEMMA2"], label="Select Model") ], outputs=[gr.Markdown(label="Brochure:")], title="Company Brochure Generator", description="Generate a professional brochure for your company using AI models. Simply provide the company name, landing page URL, and select the model.", theme="default", flagging_mode="never" ) if __name__ == "__main__": view.launch()