Spaces:
Runtime error
Runtime error
| from fastapi import FastAPI | |
| import os | |
| import json | |
| import pandas as pd | |
| import time | |
| import phoenix as px | |
| from phoenix.trace.langchain import OpenInferenceTracer, LangChainInstrumentor | |
| from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models | |
| from langchain.chains.question_answering import load_qa_chain | |
| from langchain import HuggingFaceHub | |
| from langchain.prompts import PromptTemplate | |
| from langchain.chains import RetrievalQA | |
| from langchain.callbacks import StdOutCallbackHandler | |
| #from langchain.retrievers import KNNRetriever | |
| from langchain.storage import LocalFileStore | |
| from langchain.embeddings import CacheBackedEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain.document_loaders import WebBaseLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| # from langchain import HuggingFaceHub | |
| # from langchain.prompts import PromptTemplate | |
| # from langchain.chains import LLMChain | |
| # from txtai.embeddings import Embeddings | |
| # from txtai.pipeline import Extractor | |
| # import pandas as pd | |
| # import sqlite3 | |
| # import os | |
| # NOTE - we configure docs_url to serve the interactive Docs at the root path | |
| # of the app. This way, we can use the docs as a landing page for the app on Spaces. | |
| app = FastAPI(docs_url="/") | |
| #phoenix setup | |
| session = px.launch_app() | |
| # If no exporter is specified, the tracer will export to the locally running Phoenix server | |
| tracer = OpenInferenceTracer() | |
| # If no tracer is specified, a tracer is constructed for you | |
| LangChainInstrumentor(tracer).instrument() | |
| print(session.url) | |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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" | |
| # embedding cache | |
| store = LocalFileStore("./cache/") | |
| # define embedder | |
| core_embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2") | |
| embedder = CacheBackedEmbeddings.from_bytes_store(core_embeddings_model, store) | |
| # define llm | |
| llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000}) | |
| #llm=HuggingFaceHub(repo_id="gpt2", model_kwargs={"temperature":1, "max_length":1000000}) | |
| handler = StdOutCallbackHandler() | |
| # set global variable | |
| vectorstore = None | |
| retriever = None | |
| def initialize_vectorstore(): | |
| webpage_loader = WebBaseLoader("https://www.tredence.com/case-studies/forecasting-app-installs-for-a-large-retailer-in-the-us").load() | |
| webpage_chunks = _text_splitter(webpage_loader) | |
| global vectorstore | |
| global retriever | |
| # store embeddings in vector store | |
| vectorstore = FAISS.from_documents(webpage_chunks, embedder) | |
| print("vector store initialized with sample doc") | |
| # instantiate a retriever | |
| retriever = vectorstore.as_retriever() | |
| def _text_splitter(doc): | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=600, | |
| chunk_overlap=50, | |
| length_function=len, | |
| ) | |
| return text_splitter.transform_documents(doc) | |
| def _load_docs(path: str): | |
| load_doc = WebBaseLoader(path).load() | |
| doc = _text_splitter(load_doc) | |
| return doc | |
| def get_domain_file_path(file_path: str): | |
| print("file_path " ,file_path) | |
| webpage_loader = _load_docs(file_path) | |
| webpage_chunks = _text_splitter(webpage_loader) | |
| # store embeddings in vector store | |
| vectorstore.add_documents(webpage_chunks) | |
| return "document loaded to vector store successfully!!" | |
| def _prompt(question): | |
| return f"""Answer following question using only the context below. Say 'Could not find answer with provided context' when question can't be answered. | |
| Question: {question} | |
| Context: """ | |
| class BasePromptContext: | |
| def __init__(self): | |
| self.variables_list = ["question","answer","context"] | |
| self.base_template = """Please act as an impartial judge and evaluate the quality of the provided answer which attempts to answer the provided question based on a provided context. | |
| And you'll need to submit your grading for the correctness, comprehensiveness and readability of the answer, using JSON format with the 2 items in parenthesis: | |
| ("score": [your score number for the correctness of the answer], "reasoning": [your one line step by step reasoning about the correctness of the answer]) | |
| Below is your grading rubric: | |
| - Correctness: If the answer correctly answer the question, below are the details for different scores: | |
| - Score 0: the answer is completely incorrect, doesn’t mention anything about the question or is completely contrary to the correct answer. | |
| - For example, when asked “How to terminate a databricks cluster”, the answer is empty string, or content that’s completely irrelevant, or sorry I don’t know the answer. | |
| - Score 4: the answer provides some relevance to the question and answer one aspect of the question correctly. | |
| - Example: | |
| - Question: How to terminate a databricks cluster | |
| - Answer: Databricks cluster is a cloud-based computing environment that allows users to process big data and run distributed data processing tasks efficiently. | |
| - Or answer: In the Databricks workspace, navigate to the "Clusters" tab. And then this is a hard question that I need to think more about it | |
| - Score 7: the answer mostly answer the question but is missing or hallucinating on one critical aspect. | |
| - Example: | |
| - Question: How to terminate a databricks cluster” | |
| - Answer: “In the Databricks workspace, navigate to the "Clusters" tab. | |
| Find the cluster you want to terminate from the list of active clusters. | |
| And then you’ll find a button to terminate all clusters at once” | |
| - Score 10: the answer correctly answer the question and not missing any major aspect | |
| - Example: | |
| - Question: How to terminate a databricks cluster | |
| - Answer: In the Databricks workspace, navigate to the "Clusters" tab. | |
| Find the cluster you want to terminate from the list of active clusters. | |
| Click on the down-arrow next to the cluster name to open the cluster details. | |
| Click on the "Terminate" button. A confirmation dialog will appear. Click "Terminate" again to confirm the action.” | |
| Provided question: | |
| {question} | |
| Provided answer: | |
| {answer} | |
| Provided context: | |
| {context} | |
| Please provide your grading for the correctness and explain you gave the particular grading""" | |
| class Evaluater: | |
| def __init__(self, item): | |
| self.question = item["question"] | |
| self.answer = item["answer"] | |
| #self.domain = item["domain"] | |
| self.context = item["context"] | |
| self.llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000}) | |
| def get_prompt_template(self): | |
| prompt = BasePromptContext() | |
| template = prompt.base_template | |
| varialbles = prompt.variables_list | |
| eval_template = PromptTemplate(input_variables=varialbles, template=template) | |
| return eval_template | |
| def evaluate(self): | |
| prompt = self.get_prompt_template().format(question = self.question, answer = self.answer, context = self.context) | |
| score = self.llm(prompt) | |
| return score | |
| # Create extractor instance | |
| def _create_evaluation_scenario(item): | |
| score = Evaluater(item).evaluate() | |
| return score | |
| class HallucinatePromptContext: | |
| def __init__(self): | |
| self.variables_list = ["query","answer","context"] | |
| self.base_template = """In this task, you will be presented with a query, a reference text and an answer. The answer is | |
| generated to the question based on the reference text. The answer may contain false information, you | |
| must use the reference text to determine if the answer to the question contains false information, | |
| if the answer is a hallucination of facts. Your objective is to determine whether the reference text | |
| contains factual information and is not a hallucination. A 'hallucination' in this context refers to | |
| an answer that is not based on the reference text or assumes information that is not available in | |
| the reference text. Your response should be a single word: either "factual" or "hallucinated", and | |
| it should not include any other text or characters. "hallucinated" indicates that the answer | |
| provides factually inaccurate information to the query based on the reference text. "factual" | |
| indicates that the answer to the question is correct relative to the reference text, and does not | |
| contain made up information. Please read the query and reference text carefully before determining | |
| your response. | |
| # Query: {query} | |
| # Reference text: {context} | |
| # Answer: {answer} | |
| Is the answer above factual or hallucinated based on the query and reference text?""" | |
| class HallucinatonEvaluater: | |
| def __init__(self, item): | |
| self.question = item["question"] | |
| self.answer = item["answer"] | |
| #self.domain = item["domain"] | |
| self.context = item["context"] | |
| self.llm=HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":1, "max_length":1000000}) | |
| def get_prompt_template(self): | |
| prompt = HallucinatePromptContext() | |
| template = prompt.base_template | |
| varialbles = prompt.variables_list | |
| eval_template = PromptTemplate(input_variables=varialbles, template=template) | |
| return eval_template | |
| def evaluate(self): | |
| prompt = self.get_prompt_template().format(query = self.question, answer = self.answer, context = self.context) | |
| score = self.llm(prompt) | |
| return score | |
| # Create extractor instance | |
| def _create_evaluation_scenario(item): | |
| score = Evaluater(item).evaluate() | |
| return score | |
| # Create extractor instance | |
| def _create_hallucination_scenario(item): | |
| score = HallucinatonEvaluater(item).evaluate() | |
| return score | |
| def rag( question: str, evaluate: bool): | |
| chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| retriever=retriever, | |
| callbacks=[handler], | |
| return_source_documents=True | |
| ) | |
| #response = chain("how tredence brought good insight?") | |
| response = chain(question) | |
| print(response["result"]) | |
| time.sleep(4) | |
| df = px.active_session().get_spans_dataframe() | |
| print(px.active_session()) | |
| print(px.active_session().get_spans_dataframe()) | |
| print(df.count()) | |
| df_sorted = df.sort_values(by='end_time',ascending=False) | |
| model_input = json.loads(df_sorted[df_sorted["name"] == "LLMChain"]["attributes.input.value"][0]) | |
| context = model_input["context"] | |
| print(context) | |
| if evaluate: | |
| score = _create_evaluation_scenario({ | |
| "question": question, | |
| "answer": response['result'], | |
| "context": context | |
| }) | |
| else: | |
| score = "Evaluation is Turned OFF" | |
| return {"question": question, "answer": response['result'], "context": context, "score": score} | |
| initialize_vectorstore() | |
| def trace(): | |
| df = px.active_session().get_spans_dataframe().fillna('') | |
| return df | |
| def trace(question: str, answer: str, context: str): | |
| hallucination_score = _create_hallucination_scenario({ | |
| "question": question, | |
| "answer": answer, | |
| "context": context | |
| } | |
| ) | |
| return {"hallucination_score": hallucination_score} | |
| ''' | |
| #import getpass | |
| from pyngrok import ngrok, conf | |
| #print("Enter your authtoken, which can be copied from https://dashboard.ngrok.com/auth") | |
| conf.get_default().auth_token="2WJNWULs5bCOyJnV24WQYJEKod3_YQUbM5EGCp8sgE4aQvzi" | |
| port = 37689 | |
| # Open a ngrok tunnel to the HTTP server | |
| conf.get_default().monitor_thread = False | |
| public_url = ngrok.connect(port).public_url | |
| print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:{}\"".format(public_url, port)) | |
| ''' | |