Spaces:
Build error
Build error
File size: 5,320 Bytes
0f9797b f610706 0f9797b f610706 0f9797b f610706 0f9797b 7618062 0f9797b f610706 0f9797b f610706 0f9797b f610706 0f9797b f610706 0f9797b f610706 0f9797b f610706 0f9797b f610706 0f9797b f610706 0f9797b f610706 0f9797b 7618062 f610706 7618062 0f9797b f610706 0f9797b f610706 0f9797b dcab7f7 0f9797b f610706 0f9797b f610706 0f9797b f610706 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | import os
import requests
import json
from typing import List, Optional
from dotenv import load_dotenv
from bs4 import BeautifulSoup
import gradio as gr
import google.generativeai as genai
import ollama
# Load environment variables
load_dotenv()
google_api_key = os.getenv('GOOGLE_API_KEY')
genai.configure(api_key=google_api_key)
class Website:
"""
A utility class to represent and scrape website content with robust error handling.
"""
def __init__(self, url: str, timeout: int = 10):
self.url = url
self.title = "No title found"
self.text = ""
self.links = []
self.relevant_links = []
try:
response = self._fetch_webpage(url, timeout)
if response:
self._parse_webpage(response)
except Exception as e:
print(f"Error processing {url}: {e}")
def _fetch_webpage(self, url: str, timeout: int) -> Optional[requests.Response]:
try:
parsed_url = urlparse(url)
if not all([parsed_url.scheme, parsed_url.netloc]):
print(f"Invalid URL: {url}")
return None
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=timeout)
response.raise_for_status()
return response
except (requests.RequestException, ValueError) as e:
print(f"Request failed for {url}: {e}")
return None
def _parse_webpage(self, response: requests.Response):
soup = BeautifulSoup(response.content, 'html.parser')
self.title = soup.title.string if soup.title else "No title found"
if soup.body:
for irrelevant in soup.body(["script", "style", "img", "input"]):
irrelevant.decompose()
self.text = soup.body.get_text(separator="\n", strip=True)
links = [urljoin(self.url, link.get('href')) for link in soup.find_all('a') if link.get('href')]
self.links = list(set(links))
self.relevant_links = self._filter_relevant_links(self.links)
def _filter_relevant_links(self, links: List[str]) -> List[str]:
relevant_keywords = ["about", "careers", "contact", "company", "jobs"]
return [link for link in links if any(keyword in link.lower() for keyword in relevant_keywords)]
def get_contents(self) -> str:
return f"Webpage Title:\n{self.title}\nWebpage Contents:\n{self.text}\n\n"
def __repr__(self) -> str:
return f"Website(url='{self.url}', title='{self.title}', links={len(self.links)})"
link_system_prompt = (
"Now You are an assistant that analyzes the contents of several relevant pages from a company website "
"and creates a short brochure about the company for prospective customers, investors and recruits. Respond in markdown. "
"Include details of company culture, customers and careers/jobs if you have the information. Include hyperlinks of social media platforms."
)
def stream_llama(prompt):
messages = [
{"role": "system", "content": link_system_prompt},
{"role": "user", "content": prompt}
]
stream = ollama.chat(
model='llama3.2',
messages=messages,
stream=True
)
result = ""
for chunk in stream:
result += chunk['message']['content']
yield result
def stream_gemma(prompt):
messages = [
{"role": "system", "content": link_system_prompt},
{"role": "user", "content": prompt}
]
result = ollama.chat(
model="gemma2",
messages=messages,
stream=True
)
response = ""
for chunk in result:
response += chunk['message']['content']
yield response
def stream_gemini(prompt):
model = genai.GenerativeModel(model_name="gemini-1.5-pro", system_instruction=link_system_prompt)
response = model.generate_content(prompt, stream=True)
result = ""
for chunks in response:
if chunks.text:
result += chunks.text
yield result
def stream_brochure(company_name, url, model):
prompt = f"Please generate a company brochure for {company_name}.\n"
prompt += Website(url).get_contents()
if model == "GEMINI-1.5-PRO":
result = stream_gemini(prompt)
elif model == "GEMMA2":
result = stream_gemma(prompt)
elif model == "LLAMA3.2":
result = stream_llama(prompt)
else:
raise ValueError("Unknown model")
yield from result
view = gr.Interface(
fn=stream_brochure,
inputs=[
gr.Textbox(label="Company Name:", placeholder="Enter the company name here"),
gr.Textbox(label="Landing Page URL:", placeholder="Enter the URL including http:// or https://"),
gr.Dropdown(["GEMINI-1.5-PRO","LLAMA3.2", "GEMMA2"], label="Select Model")
],
outputs=[gr.Markdown(label="Brochure:")],
title="Company Brochure Generator",
description="Generate a professional brochure for your company using AI models. Simply provide the company name, landing page URL, and select the model.",
theme="default",
flagging_mode="never"
)
if __name__ == "__main__":
view.launch()
|