|
|
import gradio as gr |
|
|
import requests |
|
|
|
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain_community.document_loaders import WebBaseLoader |
|
|
from langchain_community.vectorstores import Chroma |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
from langchain.document_loaders import PyPDFLoader |
|
|
from rerankers import Reranker |
|
|
from langchain_community.chat_models import ChatOllama |
|
|
from langchain import hub |
|
|
from langchain_core.output_parsers import StrOutputParser |
|
|
|
|
|
|
|
|
local_llm = "llama3.2:1b" |
|
|
llm = ChatOllama(model=local_llm, temperature=0, top_k=50, top_p=0.95) |
|
|
chain = llm | StrOutputParser() |
|
|
|
|
|
|
|
|
URL = "https://rua.ua.es/dspace/bitstream/10045/118060/1/Isurus_01_02.pdf" |
|
|
response = requests.get(URL) |
|
|
open("Isurus_01_02.pdf", "wb").write(response.content) |
|
|
|
|
|
loader = PyPDFLoader("Isurus_01_02.pdf") |
|
|
documents = loader.load() |
|
|
|
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) |
|
|
all_splits = text_splitter.split_documents(documents) |
|
|
|
|
|
|
|
|
model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" |
|
|
model_kwargs = {"device": "cpu"} |
|
|
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs) |
|
|
|
|
|
|
|
|
vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db") |
|
|
|
|
|
|
|
|
def test_rag(query): |
|
|
docs = vectordb.similarity_search_with_score(query) |
|
|
prompt = hub.pull("rlm/rag-prompt") |
|
|
rag_chain = prompt | llm | StrOutputParser() |
|
|
|
|
|
context = [] |
|
|
for doc, score in docs: |
|
|
if score < 7: |
|
|
doc_details = doc.to_json()['kwargs'] |
|
|
context.append(doc_details['page_content']) |
|
|
|
|
|
if context: |
|
|
generation = rag_chain.invoke({"context": "\n".join(context), "question": query}) |
|
|
return generation |
|
|
else: |
|
|
return "No tengo informaci贸n para responder a esta pregunta." |
|
|
|
|
|
|
|
|
def chat_function(message, history): |
|
|
return test_rag(message) |
|
|
|
|
|
iface = gr.ChatInterface( |
|
|
fn=chat_function, |
|
|
chatbot=gr.Chatbot(), |
|
|
title="RAG Bot", |
|
|
description="Hazme preguntas sobre el contenido del PDF.", |
|
|
) |
|
|
|
|
|
iface.launch() |
|
|
|