File size: 4,050 Bytes
4adf880 cbe2a48 4adf880 b160445 4adf880 cbe2a48 987d901 b160445 4adf880 b160445 4adf880 b160445 4adf880 987d901 4adf880 b160445 4adf880 b160445 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | from typing import Optional
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
import os
#from dotenv import load_dotenv
import requests
from bs4 import BeautifulSoup
import gradio as gr
# Load environment variables
#load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Initialize global session states
session_state = {
"messages": [],
"qa_chain": None,
"current_blog_url": None,
"blog_content": None
}
# Function to fetch and extract blog text
def fetch_blog_text(url: str) -> str:
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
paragraphs = [p.get_text() for p in soup.find_all("p")]
return "\n".join(paragraphs)
# Function to create vector store from blog text
def create_vector_store(text: str):
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = splitter.split_text(text)
embeddings = HuggingFaceEmbeddings()
vectordb = Chroma.from_texts(chunks, embeddings)
return vectordb
# Set up the RAG chain
def setup_chain(vectordb):
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
llm = ChatGroq(api_key=GROQ_API_KEY, model="gemma2-9b-it")
qa_chain = ConversationalRetrievalChain.from_llm(
llm, vectordb.as_retriever(), memory=memory
)
return qa_chain
# Load and process blog function
def load_blog(blog_url):
try:
text = fetch_blog_text(blog_url)
vectordb = create_vector_store(text)
session_state["qa_chain"] = setup_chain(vectordb)
session_state["current_blog_url"] = blog_url
session_state["blog_content"] = text
session_state["messages"] = []
return f"β
Blog loaded! You can now ask questions.\n\nPreview:\n{text[:1000]}{'...' if len(text) > 1000 else ''}"
except Exception as e:
return f"β Error loading blog: {str(e)}"
# Chat handler
def chat_with_blog(question):
if not session_state["qa_chain"]:
return "β Please load a blog first."
session_state["messages"].append({"role": "user", "content": question})
try:
result = session_state["qa_chain"]({
"question": question,
"chat_history": [
(m["content"] if m["role"] == "user" else "",
m["content"] if m["role"] == "assistant" else "")
for m in session_state["messages"][:-1]
]
})
response = result["answer"]
session_state["messages"].append({"role": "assistant", "content": response})
return response
except Exception as e:
return f"β Error: {str(e)}"
# Gradio UI
def main():
with gr.Blocks(theme=gr.themes.Base(), css="""
textarea, input, button {
font-size: 1.2em;
}
.chat-box {
height: 300px;
overflow-y: auto;
background-color: #1f1f1f;
color: white;
padding: 1em;
border-radius: 8px;
}
""") as demo:
gr.Markdown("# π Blog Bot\nYour AI-powered blog analysis companion")
with gr.Row():
blog_url_input = gr.Textbox(label="Enter Blog URL", placeholder="Paste blog URL here...")
load_btn = gr.Button("β¨ Load Blog")
blog_preview = gr.Markdown("", label="Blog Preview")
with gr.Row():
chat_input = gr.Textbox(label="Ask a question about the blog...", lines=2)
send_btn = gr.Button("Send")
chat_output = gr.Textbox(label="Response", lines=4)
load_btn.click(fn=load_blog, inputs=blog_url_input, outputs=blog_preview)
send_btn.click(fn=chat_with_blog, inputs=chat_input, outputs=chat_output)
demo.launch()
if __name__ == "__main__":
main()
|