Spaces:
Build error
Build error
| import os | |
| import requests | |
| import json | |
| from typing import List, Optional | |
| from dotenv import load_dotenv | |
| from bs4 import BeautifulSoup | |
| import gradio as gr | |
| import google.generativeai as genai | |
| import ollama | |
| # Load environment variables | |
| load_dotenv() | |
| google_api_key = os.getenv('GOOGLE_API_KEY') | |
| genai.configure(api_key=google_api_key) | |
| class Website: | |
| """ | |
| A utility class to represent and scrape website content with robust error handling. | |
| """ | |
| def __init__(self, url: str, timeout: int = 10): | |
| self.url = url | |
| self.title = "No title found" | |
| self.text = "" | |
| self.links = [] | |
| self.relevant_links = [] | |
| try: | |
| response = self._fetch_webpage(url, timeout) | |
| if response: | |
| self._parse_webpage(response) | |
| except Exception as e: | |
| print(f"Error processing {url}: {e}") | |
| def _fetch_webpage(self, url: str, timeout: int) -> Optional[requests.Response]: | |
| try: | |
| parsed_url = urlparse(url) | |
| if not all([parsed_url.scheme, parsed_url.netloc]): | |
| print(f"Invalid URL: {url}") | |
| return None | |
| headers = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| response = requests.get(url, headers=headers, timeout=timeout) | |
| response.raise_for_status() | |
| return response | |
| except (requests.RequestException, ValueError) as e: | |
| print(f"Request failed for {url}: {e}") | |
| return None | |
| def _parse_webpage(self, response: requests.Response): | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| self.title = soup.title.string if soup.title else "No title found" | |
| if soup.body: | |
| for irrelevant in soup.body(["script", "style", "img", "input"]): | |
| irrelevant.decompose() | |
| self.text = soup.body.get_text(separator="\n", strip=True) | |
| links = [urljoin(self.url, link.get('href')) for link in soup.find_all('a') if link.get('href')] | |
| self.links = list(set(links)) | |
| self.relevant_links = self._filter_relevant_links(self.links) | |
| def _filter_relevant_links(self, links: List[str]) -> List[str]: | |
| relevant_keywords = ["about", "careers", "contact", "company", "jobs"] | |
| return [link for link in links if any(keyword in link.lower() for keyword in relevant_keywords)] | |
| def get_contents(self) -> str: | |
| return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n" | |
| def __repr__(self) -> str: | |
| return f"Website(url='{self.url}', title='{self.title}', links={len(self.links)})" | |
| link_system_prompt = ( | |
| "Now You are an assistant that analyzes the contents of several relevant pages from a company website " | |
| "and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown. " | |
| "Include details of company culture, customers and careers/jobs if you have the information. Include hyperlinks of social media platforms." | |
| ) | |
| def stream_llama(prompt): | |
| messages = [ | |
| {"role": "system", "content": link_system_prompt}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| stream = ollama.chat( | |
| model='llama3.2', | |
| messages=messages, | |
| stream=True | |
| ) | |
| result = "" | |
| for chunk in stream: | |
| result += chunk['message']['content'] | |
| yield result | |
| def stream_gemma(prompt): | |
| messages = [ | |
| {"role": "system", "content": link_system_prompt}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| result = ollama.chat( | |
| model="gemma2", | |
| messages=messages, | |
| stream=True | |
| ) | |
| response = "" | |
| for chunk in result: | |
| response += chunk['message']['content'] | |
| yield response | |
| def stream_gemini(prompt): | |
| model = genai.GenerativeModel(model_name="gemini-1.5-pro", system_instruction=link_system_prompt) | |
| response = model.generate_content(prompt, stream=True) | |
| result = "" | |
| for chunks in response: | |
| if chunks.text: | |
| result += chunks.text | |
| yield result | |
| def stream_brochure(company_name, url, model): | |
| prompt = f"Please generate a company brochure for {company_name}.\n" | |
| prompt += Website(url).get_contents() | |
| if model == "GEMINI-1.5-PRO": | |
| result = stream_gemini(prompt) | |
| elif model == "GEMMA2": | |
| result = stream_gemma(prompt) | |
| elif model == "LLAMA3.2": | |
| result = stream_llama(prompt) | |
| else: | |
| raise ValueError("Unknown model") | |
| yield from result | |
| view = gr.Interface( | |
| fn=stream_brochure, | |
| inputs=[ | |
| gr.Textbox(label="Company Name:", placeholder="Enter the company name here"), | |
| gr.Textbox(label="Landing Page URL:", placeholder="Enter the URL including http:// or https://"), | |
| gr.Dropdown(["GEMINI-1.5-PRO","LLAMA3.2", "GEMMA2"], label="Select Model") | |
| ], | |
| outputs=[gr.Markdown(label="Brochure:")], | |
| title="Company Brochure Generator", | |
| description="Generate a professional brochure for your company using AI models. Simply provide the company name, landing page URL, and select the model.", | |
| theme="default", | |
| flagging_mode="never" | |
| ) | |
| if __name__ == "__main__": | |
| view.launch() | |