Spaces:

OuroborosM
/

STLA-BABY-S

Build error

App Files Files Community

OuroborosM commited on Sep 20, 2023

Commit

daf02fd

1 Parent(s): 200f9e3

add full tool

Browse files

Files changed (1) hide show

app.py +201 -2

app.py CHANGED Viewed

@@ -19,11 +19,144 @@ from langchain.memory import ConversationBufferWindowMemory
 from langchain.prompts import MessagesPlaceholder
 from langchain.agents import ConversationalChatAgent, AgentExecutor
 from langchain.callbacks import StreamlitCallbackHandler
 global CurrentAgent
 CurrentAgent = 'Structured Zero Short Agent'
 class GPTRemote(LLM):
     n: int
@@ -88,6 +221,72 @@ class GPTRemote(LLM):
 GPTfake = GPTRemote(n=0)
 async def start_playwright(question: str):
@@ -156,7 +355,7 @@ memory3 = ConversationBufferWindowMemory(memory_key="chat_history", return_messa
 input_variables=["input", "chat_history", "agent_scratchpad"]
-tools_remote = []
 agent_STRUCTURED_ZEROSHOT_REACT = initialize_agent(tools_remote, GPTfake,
                         #  agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,

 from langchain.prompts import MessagesPlaceholder
 from langchain.agents import ConversationalChatAgent, AgentExecutor
 from langchain.callbacks import StreamlitCallbackHandler
+from langchain.chains import RetrievalQA
+import pinecone
+from langchain.vectorstores import Pinecone
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
+from langchain.tools import DuckDuckGoSearchRun
+from langchain.utilities import WikipediaAPIWrapper
+import soundfile as sf
+from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
+from datasets import load_dataset
+import torch
+from langchain.chains import LLMMathChain
+from interpreter.code_interpreter import CodeInterpreter
 global CurrentAgent
 CurrentAgent = 'Structured Zero Short Agent'
+class DB_Search2(BaseTool):
+    name = "Vector Database Search"
+    description = "This is the internal vector database to search information firstly (i.e. engineering data, acronym.)"
+    def _run(self, query: str) -> str:
+        response, source = QAQuery_p(query)
+        # response = "test db_search feedback"
+        return response
+    def _arun(self, query: str):
+        raise NotImplementedError("N/A")
+pinecone.init(
+	api_key = os.environ["pinecone_api_key"],
+	# environment='asia-southeast1-gcp-free',
+    environment='us-west4-gcp-free',
+    # openapi_config=openapi_config
+)
+# index_name = 'stla-baby'
+global index_name
+index_name = 'stla-back'
+index = pinecone.Index(index_name)
+# index.delete(delete_all=True, namespace='')
+print(pinecone.whoami())
+print(index.describe_index_stats())
+embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
+device = 'cpu'
+embeddings_miniLM = HuggingFaceEmbeddings(
+    model_name=embed_model_id,
+    model_kwargs={'device': device},
+)
+# embeddings = embeddings_openai
+embeddings = embeddings_miniLM
+global vectordb_p
+vectordb_p = Pinecone.from_existing_index(index_name, embeddings)
+def QAQuery_p(question: str):
+    global vectordb_p
+    global agent
+    # global Choice
+    global CurrentAgent
+    # vectordb = Chroma(persist_directory='db', embedding_function=embeddings)
+    retriever = vectordb_p.as_retriever()
+    retriever.search_kwargs['k'] = int(os.environ["search_kwargs_k"])
+    # retriever.search_kwargs['fetch_k'] = 100
+    # if agent == agent_ZEROSHOT_REACT_2 or agent == agent_ZEROSHOT_AGENT_2:
+    if 1:
+        print("--------------- QA with Remote --------------")
+        qa = RetrievalQA.from_chain_type(llm=GPTfake, chain_type="stuff",
+                                        retriever=retriever, return_source_documents = True,
+                                        verbose = True)
+    else:
+        pass
+    # qa = VectorDBQA.from_chain_type(llm=chat, chain_type="stuff", vectorstore=vectordb, return_source_documents=True)
+    # res = qa.run(question)
+    res = qa({"query": question})
+    print("-" * 20)
+    # print("Question:", question)
+    # print("Answer:", res)
+    # print("Answer:", res['result'])
+    print("-" * 20)
+    # print("Source:", res['source_documents'])
+    response = res['result']
+    # response = res['source_documents']
+    source = res['source_documents']
+    return response, source
+Netsearch = DuckDuckGoSearchRun()
+duckduckgo_tool2 = Tool(
+    name = "Duckduckgo Internet Search",
+    func = Netsearch.run,
+    description = "Useful to search in internet for real-time information and additional information which is not available in other tools"
+)
+Wikipedia = WikipediaAPIWrapper()
+wikipedia_tool2 = Tool(
+    name = "Wikipedia Search",
+    func = Wikipedia.run,
+    description = "Useful to search a topic, country or person when there is no availble information in vector database"
+)
+def text_to_speech_loc2(Text_input):
+    global Audio_output
+    processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+    model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
+    vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
+    inputs = processor(text = Text_input, return_tensors="pt")
+    # load xvector containing speaker's voice characteristics from a dataset
+    embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
+    speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
+    speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
+    print("Type of speech: ", type(speech))
+    timestr = time.strftime("%Y%m%d-%H%M%S")
+    # sampling_rate = 16000
+    with open('sample-' + timestr + '.wav', 'wb') as audio:
+        sf.write(audio, speech.numpy(), samplerate=16000)
+    # audio = sf.write("convert1.wav", speech, samplerate=16000)
+    print("audio: ", audio)
+    Audio_output.append(audio.name)
+    return audio
+Text2Sound_tool_loc = Tool(
+    name = "Text To Sound API 2",
+    # func = Text2Sound,
+    func = text_to_speech_loc2,
+    description = "Useful when you need to convert text into sound file."
+)
 class GPTRemote(LLM):
     n: int
 GPTfake = GPTRemote(n=0)
+llm_math_2 = LLMMathChain.from_llm(GPTfake)
+math_tool_2 = Tool(
+    name ='Calculator',
+    func = llm_math_2.run,
+    description ='Useful for when you need to answer questions about math.'
+)
+class CodeBlock:
+    '''
+    CodeBlock Class which is able to run in Code Runner
+    '''
+    def __init__(self, code):
+        self.code = code
+        self.output = ""
+        self.active_line = None
+    def refresh(self):
+        print(f"Active line: {self.active_line}")
+        print(f"Output: {self.output}")
+def Code_Runner(code_raw: str):
+    # interpreter = CodeInterpreter(language="python", debug_mode=True)
+    global CurrentAgent
+    if CurrentAgent == "Zero Short React 2":
+        code_raw = RemoveIndent(code_raw)
+    if '!pip' in code_raw or 'pip install' in code_raw:
+        try:
+            code_raw=code_raw.replace('!pip', 'pip')
+        except Exception as e:
+            print(e)
+        interpreter = CodeInterpreter(language="shell", debug_mode=True)
+    else:
+        interpreter = CodeInterpreter(language="python", debug_mode=True)
+    # interpreter = CodeInterpreter(language=lang, debug_mode=True)
+    code_block = CodeBlock(code_raw)
+    interpreter.active_block = code_block
+    output = interpreter.run()
+    print("Real Output: \n", output)
+    try:
+        if output.strip() =="" or output == []:
+            output = "It is Done. No Error Found."
+    except Exception as e:
+        print(e)
+    return output
+def RemoveIndent(code_string, indentation_level=4):
+    lines = code_string.split('\n')
+    corrected_lines = []
+    for line in lines:
+        if line.strip() == "":
+            continue
+        line_without_indentation = line[indentation_level:] \
+            if line.startswith(' ' * indentation_level) else line
+        corrected_lines.append(line_without_indentation)
+    corrected_content = '\n'.join(corrected_lines)
+    return corrected_content
+python_tool3 = Tool(
+    name = "Code Runner",
+    func = Code_Runner,
+    description = """Code Interpreter which is able to run code block in local machine.\n It is capable to treat **any** task by running the code and output the result. (i.e. analyzer data, modify/creat documents, draw diagram/flowchart ...)\n You should input detail code with right indentation."""
+)
 async def start_playwright(question: str):
 input_variables=["input", "chat_history", "agent_scratchpad"]
+tools_remote = [DB_Search2(), duckduckgo_tool2, wikipedia_tool2, python_tool3, math_tool_2, Text2Sound_tool_loc]
 agent_STRUCTURED_ZEROSHOT_REACT = initialize_agent(tools_remote, GPTfake,
                         #  agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,