| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from torch import cuda, bfloat16 | |
| import torch | |
| import transformers | |
| from transformers import AutoTokenizer | |
| from time import time | |
| import chromadb | |
| from chromadb.config import Settings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.chains import RetrievalQA | |
| from langchain.vectorstores import Chroma | |
| from langchain.document_loaders import PyPDFLoader | |
| import requests | |
| """ | |
| For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference | |
| """ | |
| client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
| def respond(message, history, system_message, max_tokens, temperature, top_p): | |
| import requests | |
| from pdf.loader import PyPDFLoader | |
| URL = "https://www.esmo.org/content/download/6594/114963/1/ES-Cancer-de-Mama-Guia-para-Pacientes.pdf" | |
| response = requests.get(URL) | |
| open("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf", "wb").write(response.content) | |
| loader = PyPDFLoader("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf") | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) | |
| all_splits = text_splitter.split_documents(documents) | |
| model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" | |
| model_kwargs = {"device": "cuda"} | |
| embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs) | |
| vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db") | |
| query = message | |
| docs = vectordb.similarity_search_with_score(query) | |
| context = [] | |
| for doc, score in docs: | |
| if score < 7: | |
| doc_details = doc.to_json()['kwargs'] | |
| context.append(doc_details['page_content']) | |
| if len(context) != 0: | |
| messages = [ | |
| {"role": "user", "content": "Bas谩ndote en la siguiente informaci贸n: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}] | |
| prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| outputs = pipeline(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_k=50, | |
| top_p=top_p) | |
| answer = outputs[0]["generated_text"] | |
| return answer[answer.rfind("[/INST]") + 8:], docs | |
| else: | |
| return "No tengo informaci贸n para responder a esta pregunta", docs | |
| """ | |
| For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface | |
| """ | |
| demo = gr.ChatInterface( | |
| respond, | |
| additional_inputs=[ | |
| gr.Textbox(value="You are a friendly Chatbot.", label="System message"), | |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="Top-p (nucleus sampling)", | |
| ), | |
| ], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |