Spaces:
Sleeping
Sleeping
| import openai | |
| import gradio as gr | |
| import requests | |
| from bs4 import BeautifulSoup | |
| # Initialize OpenAI with your API key | |
| openai.api_key = "Your API - KEY" | |
| # Function to fetch and crawl website content | |
| def fetch_website_content(url): | |
| try: | |
| # Send a GET request to the website | |
| response = requests.get(url) | |
| if response.status_code != 200: | |
| return "Error: Could not fetch the webpage. Please check the URL." | |
| # Parse the website content with BeautifulSoup | |
| soup = BeautifulSoup(response.content, 'html.parser') | |
| # Extract text content from paragraph tags | |
| website_text = " ".join([p.text for p in soup.find_all('p')]) | |
| return website_text | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Function to split content into chunks that fit within the token limits | |
| def split_content_into_chunks(content, max_chunk_size=3000): | |
| # Split the content into chunks based on token limits | |
| words = content.split() | |
| chunks = [] | |
| while words: | |
| chunk = words[:max_chunk_size] | |
| chunks.append(" ".join(chunk)) | |
| words = words[max_chunk_size:] | |
| return chunks | |
| # Function to query GPT model with website content | |
| def ask_question(url, question): | |
| # Fetch website content | |
| website_text = fetch_website_content(url) | |
| if "Error" in website_text: | |
| return website_text | |
| # Split content into manageable chunks based on OpenAI's token limit | |
| chunks = split_content_into_chunks(website_text) | |
| # Initialize a variable to hold the entire response | |
| full_answer = "" | |
| # Query GPT model for each chunk | |
| for chunk in chunks: | |
| # Prepare the prompt for GPT | |
| messages = [ | |
| {"role": "system", "content": "You are a helpful assistant who answers questions based on the following website content."}, | |
| {"role": "user", "content": f"Website content: {chunk}\n\nQuestion: {question}"} | |
| ] | |
| # Use GPT-3.5-turbo model to generate an answer | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model="gpt-3.5-turbo", # Use gpt-4 if you have access to it | |
| messages=messages, | |
| max_tokens=3000, # Increase max_tokens to the highest possible value | |
| temperature=0.5, | |
| ) | |
| answer = response.choices[0].message['content'].strip() | |
| full_answer += answer + "\n\n" # Append chunked responses together | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| return full_answer | |
| # Gradio interface for chatbot | |
| def chatbot(url, question): | |
| return ask_question(url, question) | |
| # Define Gradio interface using new syntax | |
| iface = gr.Interface( | |
| fn=chatbot, | |
| inputs=[ | |
| gr.Textbox(label="Website URL", placeholder="Enter website URL here..."), | |
| gr.Textbox(label="Your Question", placeholder="Ask a question to understand what is in the website or generate article based on the website information...") | |
| ], | |
| outputs=gr.Textbox(label="Responses"), | |
| title="Contentigo - Lite", | |
| description="Ask questions about the content of any website. Also, generate articles based on the website content." | |
| ) | |
| # Launch the Gradio interface | |
| iface.launch() | |