| import gradio as gr |
| from huggingface_hub import InferenceClient |
| from torch import cuda, bfloat16 |
| import torch |
| import transformers |
| from transformers import AutoTokenizer |
| from time import time |
| import chromadb |
| from chromadb.config import Settings |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from langchain.embeddings import HuggingFaceEmbeddings |
| from langchain.chains import RetrievalQA |
| from langchain.vectorstores import Chroma |
| from langchain.document_loaders import PyPDFLoader |
| import requests |
|
|
| """ |
| For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference |
| """ |
| |
| |
|
|
|
|
| import os |
| from transformers.hf_api import HfApi |
|
|
|
|
| username = 'islasher' |
|
|
| api_token_secret_name = "HF_API_TOKEN" |
| api_secret_url = f"https://huggingface.co/api/users/{username}/secrets/{api_token_secret_name}" |
|
|
| |
| response = requests.get(api_secret_url) |
| response.raise_for_status() |
|
|
| |
| api_token = response.json()["value"] |
|
|
| |
| if api_token is None: |
| raise ValueError(f"Failed to retrieve API token from the Hugging Face API for {api_token_secret_name}") |
|
|
| |
| api = HfApi() |
| api.login(token=api_token) |
|
|
|
|
|
|
| |
| |
|
|
|
|
|
|
| model_id = 'mistralai/Mistral-7B-Instruct-v0.1' |
| model_config = transformers.AutoConfig.from_pretrained( |
| model_id, |
| max_new_tokens=200 |
| ) |
|
|
| model = transformers.AutoModelForCausalLM.from_pretrained( |
| model_id, |
| trust_remote_code=True, |
| config=model_config, |
| quantization_config=bnb_config, |
| device_map='auto', |
| ) |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
| query_pipeline = transformers.pipeline( |
| "text-generation", |
| model=model, |
| tokenizer=tokenizer, |
| torch_dtype=torch.float16, |
| device_map="auto", max_new_tokens=200) |
|
|
|
|
| def respond(message, history, system_message, max_tokens, temperature, top_p): |
| |
| URL = "https://www.esmo.org/content/download/6594/114963/1/ES-Cancer-de-Mama-Guia-para-Pacientes.pdf" |
| response = requests.get(URL) |
| open("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf", "wb").write(response.content) |
| loader = PyPDFLoader("ES-Cancer-de-Mama-Guia-para-Pacientes.pdf") |
| documents = loader.load() |
| |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) |
| all_splits = text_splitter.split_documents(documents) |
| |
| model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" |
| |
| embeddings = HuggingFaceEmbeddings(model_name=model_name) |
| |
| vectordb = Chroma.from_documents(documents=all_splits, embedding=embeddings, persist_directory="chroma_db") |
|
|
| pipeline=query_pipeline |
| query = message |
| docs = vectordb.similarity_search_with_score(query) |
| context = [] |
| for doc, score in docs: |
| if score < 7: |
| doc_details = doc.to_json()['kwargs'] |
| context.append(doc_details['page_content']) |
| if len(context) != 0: |
| messages = [ |
| {"role": "user", "content": "Bas谩ndote en la siguiente informaci贸n: " + "\n".join(context) + "\n Responde en castellano a la pregunta: " + query}] |
| prompt = pipeline.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| outputs = pipeline(prompt, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_k=50, |
| top_p=top_p) |
| answer = outputs[0]["generated_text"] |
| return answer[answer.rfind("[/INST]") + 8:], docs |
| else: |
| return "No tengo informaci贸n para responder a esta pregunta", docs |
|
|
|
|
|
|
|
|
| """ |
| For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface |
| """ |
| demo = gr.ChatInterface( |
| respond, |
| additional_inputs=[ |
| gr.Textbox(value="You are a friendly Chatbot.", label="System message"), |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), |
| gr.Slider( |
| minimum=0.1, |
| maximum=1.0, |
| value=0.95, |
| step=0.05, |
| label="Top-p (nucleus sampling)", |
| ), |
| ], |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |