|
|
import gradio as gr |
|
|
import bs4 |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain_community.document_loaders import WebBaseLoader |
|
|
from langchain_community.vectorstores import Chroma |
|
|
from langchain_community.embeddings import OllamaEmbeddings |
|
|
import ollama |
|
|
|
|
|
|
|
|
def load_and_retrieve_docs(url): |
|
|
loader = WebBaseLoader( |
|
|
web_paths=(url,), |
|
|
bs_kwargs=dict() |
|
|
) |
|
|
docs = loader.load() |
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
|
splits = text_splitter.split_documents(docs) |
|
|
embeddings = OllamaEmbeddings(model="mistral") |
|
|
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings) |
|
|
return vectorstore.as_retriever() |
|
|
|
|
|
|
|
|
def format_docs(docs): |
|
|
return "\n\n".join(doc.page_content for doc in docs) |
|
|
|
|
|
|
|
|
def rag_chain(url, question): |
|
|
retriever = load_and_retrieve_docs(url) |
|
|
retrieved_docs = retriever.invoke(question) |
|
|
formatted_context = format_docs(retrieved_docs) |
|
|
formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}" |
|
|
response = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': formatted_prompt}]) |
|
|
return response['message']['content'] |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=rag_chain, |
|
|
inputs=["text", "text"], |
|
|
outputs="text", |
|
|
title="RAG Chain Question Answering", |
|
|
description="Enter a URL and a query to get answers from the RAG chain." |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch() |
|
|
|