Spaces:

Rktim
/

Blogbot

Build error

File size: 4,050 Bytes

from typing import Optional
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
import os
#from dotenv import load_dotenv
import requests
from bs4 import BeautifulSoup
import gradio as gr

# Load environment variables
#load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Initialize global session states
session_state = {
    "messages": [],
    "qa_chain": None,
    "current_blog_url": None,
    "blog_content": None
}

# Function to fetch and extract blog text
def fetch_blog_text(url: str) -> str:
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    paragraphs = [p.get_text() for p in soup.find_all("p")]
    return "\n".join(paragraphs)

# Function to create vector store from blog text
def create_vector_store(text: str):
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = splitter.split_text(text)
    embeddings = HuggingFaceEmbeddings()
    vectordb = Chroma.from_texts(chunks, embeddings)
    return vectordb

# Set up the RAG chain
def setup_chain(vectordb):
    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
    llm = ChatGroq(api_key=GROQ_API_KEY, model="gemma2-9b-it")
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm, vectordb.as_retriever(), memory=memory
    )
    return qa_chain

# Load and process blog function
def load_blog(blog_url):
    try:
        text = fetch_blog_text(blog_url)
        vectordb = create_vector_store(text)
        session_state["qa_chain"] = setup_chain(vectordb)
        session_state["current_blog_url"] = blog_url
        session_state["blog_content"] = text
        session_state["messages"] = []
        return f"✅ Blog loaded! You can now ask questions.\n\nPreview:\n{text[:1000]}{'...' if len(text) > 1000 else ''}"
    except Exception as e:
        return f"❌ Error loading blog: {str(e)}"

# Chat handler
def chat_with_blog(question):
    if not session_state["qa_chain"]:
        return "❌ Please load a blog first."

    session_state["messages"].append({"role": "user", "content": question})
    try:
        result = session_state["qa_chain"]({
            "question": question,
            "chat_history": [
                (m["content"] if m["role"] == "user" else "",
                 m["content"] if m["role"] == "assistant" else "")
                for m in session_state["messages"][:-1]
            ]
        })
        response = result["answer"]
        session_state["messages"].append({"role": "assistant", "content": response})
        return response
    except Exception as e:
        return f"❌ Error: {str(e)}"

# Gradio UI
def main():
    with gr.Blocks(theme=gr.themes.Base(), css="""
        textarea, input, button {
            font-size: 1.2em;
        }
        .chat-box {
            height: 300px;
            overflow-y: auto;
            background-color: #1f1f1f;
            color: white;
            padding: 1em;
            border-radius: 8px;
        }
    """) as demo:
        gr.Markdown("# 📝 Blog Bot\nYour AI-powered blog analysis companion")

        with gr.Row():
            blog_url_input = gr.Textbox(label="Enter Blog URL", placeholder="Paste blog URL here...")
            load_btn = gr.Button("✨ Load Blog")

        blog_preview = gr.Markdown("", label="Blog Preview")

        with gr.Row():
            chat_input = gr.Textbox(label="Ask a question about the blog...", lines=2)
            send_btn = gr.Button("Send")

        chat_output = gr.Textbox(label="Response", lines=4)

        load_btn.click(fn=load_blog, inputs=blog_url_input, outputs=blog_preview)
        send_btn.click(fn=chat_with_blog, inputs=chat_input, outputs=chat_output)

    demo.launch()

if __name__ == "__main__":
    main()