Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| import theme | |
| from huggingface_hub import from_pretrained_keras | |
| from tensorflow.keras.applications import EfficientNetB0 | |
| import tensorflow as tf | |
| from tensorflow import keras | |
| from PIL import Image | |
| theme = theme.Theme() | |
| import os | |
| import sys | |
| sys.path.append('../..') | |
| #langchain | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import ChatPromptTemplate | |
| from langchain.schema import StrOutputParser | |
| from langchain.schema.runnable import Runnable | |
| from langchain.schema.runnable.config import RunnableConfig | |
| from langchain.chains import ( | |
| LLMChain, ConversationalRetrievalChain) | |
| from langchain.vectorstores import Chroma | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.chains import LLMChain | |
| from langchain.prompts.prompt import PromptTemplate | |
| from langchain.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate | |
| from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate, ChatPromptTemplate, MessagesPlaceholder | |
| from langchain.document_loaders import PyPDFDirectoryLoader | |
| from langchain.output_parsers import PydanticOutputParser | |
| from langchain_community.llms import HuggingFaceHub | |
| from langchain_community.document_loaders import WebBaseLoader | |
| from pydantic.v1 import BaseModel, Field | |
| import shutil | |
| custom_title = "<span style='color: rgb(243, 239, 224);'>Green Greta</span>" | |
| # Cell 2: ChatBot Model | |
| loader = WebBaseLoader(["https://www.epa.gov/recycle/frequent-questions-recycling"]) | |
| data=loader.load() | |
| # split documents | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=1024, | |
| chunk_overlap=150, | |
| length_function=len | |
| ) | |
| docs = text_splitter.split_documents(data) | |
| # define embedding | |
| embeddings = HuggingFaceEmbeddings(model_name='thenlper/gte-small') | |
| # create vector database from data | |
| persist_directory = 'docs/chroma/' | |
| # Remove old database files if any | |
| shutil.rmtree(persist_directory, ignore_errors=True) | |
| vectordb = Chroma.from_documents( | |
| documents=docs, | |
| embedding=embeddings, | |
| persist_directory=persist_directory | |
| ) | |
| # define retriever | |
| retriever = vectordb.as_retriever(search_kwargs={"k": 2}, search_type="mmr") | |
| class FinalAnswer(BaseModel): | |
| question: str = Field() | |
| answer: str = Field() | |
| # Assuming you have a parser for the FinalAnswer class | |
| parser = PydanticOutputParser(pydantic_object=FinalAnswer) | |
| template = """ | |
| Your name is Greta and you are a recycling chatbot with the objective to anwer questions from user in English or Spanish / | |
| Has sido diseñado y creado por el Grupo 1 del Máster en Data Science & Big Data de la promoción 2023/2024 de la Universidad Complutense de Madrid. Este grupo está fromado por Rocío, María Guillermo, Alejandra, Paloma y Álvaro / | |
| Use the following pieces of context to answer the question / | |
| If the question is English answer in English / | |
| If the question is Spanish answer in Spanish / | |
| Do not mention the word context when you answer a question / | |
| Answer the question fully and provide as much relevant detail as possible. Do not cut your response short / | |
| Context: {context} | |
| User: {question} | |
| {format_instructions} | |
| """ | |
| # Create the chat prompt templates | |
| sys_prompt = SystemMessagePromptTemplate.from_template(template) | |
| qa_prompt = ChatPromptTemplate( | |
| messages=[ | |
| sys_prompt, | |
| HumanMessagePromptTemplate.from_template("{question}")], | |
| partial_variables={"format_instructions": parser.get_format_instructions()} | |
| ) | |
| llm = HuggingFaceHub( | |
| repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| task="text-generation", | |
| model_kwargs={ | |
| "max_new_tokens": 2000, | |
| "top_k": 30, | |
| "temperature": 0.1, | |
| "repetition_penalty": 1.03 | |
| }, | |
| ) | |
| qa_chain = ConversationalRetrievalChain.from_llm( | |
| llm = llm, | |
| memory = ConversationBufferMemory(llm=llm, memory_key="chat_history", input_key='question', output_key='output'), | |
| retriever = retriever, | |
| verbose = True, | |
| combine_docs_chain_kwargs={'prompt': qa_prompt}, | |
| get_chat_history = lambda h : h, | |
| rephrase_question = False, | |
| output_key = 'output', | |
| ) | |
| import numpy as np | |
| import soundfile as sf | |
| # Load ASR pipeline | |
| transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small") | |
| def chat_interface(question, audio_input=None, history=None): | |
| if audio_input is not None: | |
| # Function to transcribe the audio input | |
| def transcribe(audio): | |
| # Normalize audio | |
| audio /= np.max(np.abs(audio)) | |
| # Write the audio to a temporary file | |
| temp_audio_file = "temp_audio.wav" | |
| sf.write(temp_audio_file, audio, 16000) # Assuming 16kHz sample rate | |
| # Transcribe the audio from the temporary file | |
| return transcriber(temp_audio_file)[0]['transcription'] | |
| # Transcribe the audio input | |
| question = transcribe(audio_input) | |
| return question | |
| # Original chatbot logic | |
| result = qa_chain.invoke({'question': question}) | |
| output_string = result['output'] | |
| # Find the index of the last occurrence of "answer": in the string | |
| answer_index = output_string.rfind('"answer":') | |
| # Extract the substring starting from the "answer": index | |
| answer_part = output_string[answer_index + len('"answer":'):].strip() | |
| # Find the next occurrence of a double quote to get the start of the answer value | |
| quote_index = answer_part.find('"') | |
| # Extract the answer value between double quotes | |
| answer_value = answer_part[quote_index + 1:answer_part.find('"', quote_index + 1)] | |
| return answer_value | |
| chatbot_gradio_app = gr.Interface( | |
| fn=chat_interface, | |
| inputs=[ | |
| gr.Textbox(lines=3, label="Type your message here"), | |
| gr.Audio(label="Record your voice", type='numpy') # Change type to "microphone" | |
| ], | |
| outputs=gr.Textbox(label="Bot's Response"), | |
| ) | |
| chatbot_gradio_app.launch() |