Spaces:
Runtime error
Runtime error
| import pinecone | |
| import os, sys | |
| import gradio as gr | |
| import logging | |
| import openai | |
| import matplotlib.pyplot as plt | |
| from dotenv import load_dotenv | |
| #from anthropic import Anthropic, HUMAN_PROMPT, AI_PROMPT | |
| from llama_index import GPTVectorStoreIndex, GPTListIndex, SimpleDirectoryReader, ServiceContext, LLMPredictor, PromptHelper, EmptyIndex | |
| from llama_index.callbacks import CallbackManager, LlamaDebugHandler, CBEventType | |
| from llama_index import VectorStoreIndex, load_index_from_storage, StorageContext | |
| from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt | |
| from llama_index.logger import LlamaLogger | |
| from llama_index.vector_stores import PineconeVectorStore | |
| from llama_index.llms import Anthropic | |
| from llama_index.llms import OpenAI | |
| from nomic import atlas | |
| import nomic | |
| import numpy as np | |
| from llama_index.llms import Replicate | |
| # # Query function | |
| # def query_index(query): | |
| # # Profile index operations | |
| # cProfile.run('query_tokens = llama_model.tokenize_docs([query])') | |
| # cProfile.run('results = index.query(query_tokens[0].embeddings, top_k=5)') | |
| # # Log results | |
| # logging.debug(f'Query tokens: {query_tokens}') | |
| # logging.debug(f'Top 5 results: {results.ids[:5]}') | |
| # # Visualize projections | |
| # #projected = umap.UMAP().fit_transform(doc_embeddings) | |
| # #plt.scatter(projected[:,0], projected[:,1]) | |
| # #plt.savefig('projection.png') | |
| # return results | |
| query_examples=[ | |
| ("What is iMerit's biggest strength?"), | |
| ("How did CrowdReason benefit by working with iMerit?"), | |
| ("What do you mean by human-in-the-loop workflows?") | |
| ] | |
| model_name1="gpt-4-0613"#"gpt-3.5-turbo-0613" #gpt-4-0613 | |
| model_name2= "claude-2"#"claude-instant-1" "claude-instant-1.2" | |
| model_name3="llama2" | |
| pinecone_vector_index='doc-index' | |
| env_set=0 | |
| index1=0 | |
| index2=0 | |
| llm_predictor1=0 | |
| llm_predictor2=0 | |
| prompt_helper=0 | |
| prompt_helper_anthropic=0 | |
| llama_debug=0 | |
| DEBUG_VIS=0 | |
| LLAMA_13B_V2_CHAT = "a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5" | |
| #LLAMA_13B_V2_CHAT = "replicate/llama-2-70b-chat:2c1608e18606fad2812020dc541930f2d0495ce32eee50074220b87300bc16e1" | |
| # Logging setup | |
| logging.basicConfig(stream=sys.stdout, level=logging.INFO) | |
| logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
| def construct_index(directory_path): | |
| global llm_predictor1, llm_predictor2, prompt_helper, prompt_helper_anthropic | |
| openai_max_input_size = 4096 | |
| openai_num_outputs = 512 | |
| openai_max_chunk_overlap = 0.9 | |
| openai_chunk_size_limit = 600 | |
| anthropic_max_input_size = 100000 | |
| anthropic_num_output = 2048 | |
| anthropic_max_chunk_overlap = 0.9 | |
| load_dotenv() | |
| prompt_helper = PromptHelper(openai_max_input_size, openai_num_outputs, openai_max_chunk_overlap, chunk_size_limit=openai_chunk_size_limit) | |
| prompt_helper_anthropic = PromptHelper(anthropic_max_input_size, anthropic_num_output, anthropic_max_chunk_overlap) | |
| storage_context_index2=StorageContext.from_defaults() | |
| storage_context_index1 = StorageContext.from_defaults() | |
| llm_predictor1 = LLMPredictor(llm=OpenAI(model=model_name1, temperature=0.5, model_name=model_name1, max_tokens=openai_num_outputs)) #gpt-3.5-turbo | |
| llm_predictor2 = LLMPredictor(llm = Anthropic(model=model_name2)) | |
| # service_context2 = ServiceContext.from_defaults(llm_predictor=llm_predictor2, prompt_helper=prompt_helper_anthropic) | |
| # # Pinecone setup | |
| # pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'), environment=os.environ.get('PINECONE_ENV')) | |
| # if ((pinecone.list_indexes()) != [pinecone_vector_index]): | |
| # pinecone.create_index(pinecone_vector_index, dimension=1536, metric="euclidean", pod_type="p1") | |
| # pinecone_index = pinecone.Index(pinecone_vector_index) | |
| # documents = SimpleDirectoryReader(directory_path).load_data() | |
| # index1= VectorStoreIndex(documents, llm_predictor=llm_predictor1, prompt_helper=prompt_helper, storage_context=storage_context_index1) | |
| # index1.set_index_id = "index1" | |
| # index1.storage_context.persist(persist_dir="./storage1") | |
| # #index2 = VectorStoreIndex(documents, llm_predictor=llm_predictor2, prompt_helper=prompt_helper) | |
| # vector_store = PineconeVectorStore(pinecone_index=pinecone_index) | |
| # storage_context_index2 = StorageContext.from_defaults(vector_store=vector_store) | |
| # index2 = VectorStoreIndex.from_documents(documents, llm_predictor=llm_predictor2, storage_context=storage_context_index2) | |
| # index2.storage_context.persist(persist_dir="./storage2") | |
| # return index1 | |
| # def get_ids_from_query(index,input_vector): | |
| # print("searching pinecone...") | |
| # results = index.query(vector=input_vector, top_k=10000,include_values=False) | |
| # ids = set() | |
| # for result in results['matches']: | |
| # ids.add(result['id']) | |
| # return ids | |
| # def get_all_ids_from_index(index, num_dimensions, namespace=""): | |
| # num_vectors = index.describe_index_stats()["namespaces"][namespace]['vector_count'] | |
| # all_ids = set() | |
| # while len(all_ids) < num_vectors: | |
| # print("Length of ids list is shorter than the number of total vectors...") | |
| # input_vector = np.random.rand(num_dimensions).tolist() | |
| # print("creating random vector...") | |
| # ids = get_ids_from_query(index,input_vector) | |
| # print("getting ids from a vector query...") | |
| # all_ids.update(ids) | |
| # print("updating ids set...") | |
| # print(f"Collected {len(all_ids)} ids out of {num_vectors}.") | |
| # return all_ids | |
| # def visualize(index, num_embeddings): | |
| # load_dotenv() | |
| # nomic.login(os.environ.get('NOMIC_API_KEY')) | |
| # print("Visualizing embedding count", num_embeddings) | |
| # print(index.describe_index_stats()) | |
| # all_ids = get_all_ids_from_index(index, num_dimensions=1536, namespace="") | |
| # id_list=list(all_ids) | |
| # vectors = index.fetch(id_list) | |
| # ids = [] | |
| # embeddings = [] | |
| # for id, vector in vectors['vectors'].items(): | |
| # ids.append(id) | |
| # embeddings.append(vector['values']) | |
| # embeddings = np.array(embeddings) | |
| # atlas.map_embeddings(embeddings=embeddings, data=[{'id': id} for id in ids], id_field='id') | |
| # inject custom system prompt into llama-2 | |
| def custom_completion_to_prompt(completion: str) -> str: | |
| return completion_to_prompt( | |
| completion, | |
| system_prompt=( | |
| "You are a Q&A assistant. Your goal is to answer questions as " | |
| "accurately as possible is the instructions and context provided." | |
| ), | |
| ) | |
| def chatbot_init(): | |
| global llm_predictor1, llm_predictor2, llama_debug, prompt_helper_anthropic, prompt_helper | |
| load_dotenv() | |
| llama_debug = LlamaDebugHandler(print_trace_on_end=True) | |
| callback_manager = CallbackManager([llama_debug]) | |
| llama_logger_openai = LlamaLogger() | |
| llama_logger_anthropic = LlamaLogger() | |
| #service_context_Anthropic = ServiceContext.from_defaults(callback_manager=callback_manager, prompt_helper=prompt_helper_anthropic, llm_predictor=llm_predictor2, llama_logger=llama_logger_anthropic) | |
| #service_context_OpenAI = ServiceContext.from_defaults(callback_manager=callback_manager, prompt_helper=prompt_helper, llm_predictor=llm_predictor1, llama_logger=llama_logger_openai) | |
| service_context_Anthropic = ServiceContext.from_defaults(llm = Anthropic(model=model_name2), callback_manager=callback_manager, prompt_helper=prompt_helper_anthropic, llama_logger=llama_logger_anthropic) | |
| service_context_OpenAI = ServiceContext.from_defaults(llm=OpenAI(model=model_name1, temperature=0.5),callback_manager=callback_manager, prompt_helper=prompt_helper,llama_logger=llama_logger_openai) | |
| # storage_context_index1 = StorageContext.from_defaults(persist_dir="./storage1") | |
| # index1 = load_index_from_storage(storage_context_index1, service_context=service_context_OpenAI) | |
| # pinecone.init(api_key=os.environ.get('PINECONE_API_KEY'), environment=os.environ.get('PINECONE_ENV')) | |
| # pinecone_index = pinecone.Index(pinecone_vector_index) | |
| # vector_store = PineconeVectorStore(pinecone_index=pinecone_index) | |
| # storage_context_index2 = StorageContext.from_defaults(vector_store=vector_store) | |
| # index2 = VectorStoreIndex([], storage_context=storage_context_index2, service_context=service_context_Anthropic) | |
| # if (((pinecone.list_indexes()) == [pinecone_vector_index]) and DEBUG_VIS==1): | |
| # print(pinecone.describe_index("doc-index")) | |
| # print(pinecone_index.describe_index_stats()) | |
| # num_vectors = pinecone_index.describe_index_stats()["namespaces"][""]['vector_count'] | |
| # print(num_vectors) | |
| # visualize(pinecone_index, num_vectors) | |
| test_Index1 = EmptyIndex(service_context=service_context_OpenAI) | |
| test_Index2 = EmptyIndex(service_context=service_context_Anthropic) | |
| return test_Index1, test_Index2 | |
| def chatbot(input_text): | |
| global env_set, index1, index2, llama_debug | |
| if env_set == 0: | |
| index1, index2 = chatbot_init() | |
| env_set=1 | |
| query_engine1 = index1.as_query_engine() | |
| response1 = query_engine1.query(input_text) | |
| query_engine2 = index2.as_query_engine() | |
| response2 = query_engine2.query(input_text) | |
| llama_logger3 = LlamaLogger() | |
| llm_replicate = Replicate( | |
| model=LLAMA_13B_V2_CHAT, | |
| temperature=0.1, | |
| # override max tokens since it's interpreted | |
| # as context window instead of max tokens | |
| context_window=4096, | |
| # override completion representation for llama 2 | |
| completion_to_prompt=custom_completion_to_prompt, | |
| # if using llama 2 for data agents, also override the message representation | |
| messages_to_prompt=messages_to_prompt, | |
| ) | |
| ctx = ServiceContext.from_defaults(llm=llm_replicate, llama_logger=llama_logger3) | |
| # query_engine3 = index2.as_query_engine(service_context=ctx) | |
| test_Index3 = EmptyIndex(service_context=ctx) | |
| query_engine3 = test_Index3.as_query_engine() | |
| response3 = query_engine3.query(input_text) | |
| # print(llama_debug.get_event_time_info(CBEventType.LLM)) | |
| # event_pairs = llama_debug.get_llm_inputs_outputs() | |
| # print(event_pairs[0][0]) | |
| print(ctx.llama_logger.get_logs()) | |
| #print(event_pairs[0][1].payload.keys()) | |
| #print(event_pairs[0][1].payload["response"]) | |
| llama_debug.flush_event_logs() | |
| return response1.response, response2.response, response3.response | |
| iface = gr.Interface(fn=chatbot, | |
| inputs=gr.components.Textbox(lines=7, label="Enter your query"), | |
| outputs=[gr.components.Textbox(label=model_name1+ " Output"), gr.components.Textbox(label=model_name2+" Output"), gr.components.Textbox(label=model_name3+" Output")], | |
| #outputs="text", | |
| examples=query_examples, | |
| title="iMerit LLM Comparison Engine", | |
| description="Evaluating various LLM's reponses to the same query", | |
| ) | |
| index = construct_index("docs") | |
| iface.launch(share=False) | |