Cicero-QA-api-dev

Runtime error

App Files Files Community

Cicero-QA-api-dev / app.py

Rams901

Update app.py

ca458ea over 2 years ago

raw

history blame contribute delete

6.84 kB

	import gradio as gr
	import numpy as np
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import LLMChain
	from langchain import PromptTemplate
	import re
	import pandas as pd
	from langchain.vectorstores import FAISS
	import requests
	from typing import List
	from langchain.schema import (
	SystemMessage,
	HumanMessage,
	AIMessage
	)
	import os
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.chat_models import ChatOpenAI

	from langchain.llms.base import LLM
	from typing import Optional, List, Mapping, Any

	import ast
	from utils import ClaudeLLM, extract_website_name, remove_numbers

	embeddings = HuggingFaceEmbeddings()
	db = FAISS.load_local('db_full', embeddings)

	mp_docs = {}
	llm = ClaudeLLM()
	# ChatOpenAI(
	# temperature=0,
	# model='gpt-3.5-turbo-16k'
	# )


	def add_text(history, text):

	print(history)
	history = history + [(text, None)]

	return history, ""


	def retrieve_thoughts(query, ):
	# print(db.similarity_search_with_score(query = query, k = k, fetch_k = k*10))
	docs_with_score = db.similarity_search_with_score(query = query, k = 1500, fetch_k = len(db.index_to_docstore_id.values()))
	df = pd.DataFrame([dict(doc[0])['metadata'] for doc in docs_with_score], )
	df = pd.concat((df, pd.DataFrame([dict(doc[0])['page_content'] for doc in docs_with_score], columns = ['page_content'])), axis = 1)
	df = pd.concat((df, pd.DataFrame([doc[1] for doc in docs_with_score], columns = ['score'])), axis = 1)

	# TO-DO: What if user query doesn't match what we provide as documents

	tier_1 = df[df['score'] < 0.7]
	tier_2 = df[(df['score'] < 0.95) * (df["score"] > 0.7)]

	chunks_1 = tier_1.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
	tier_1_adjusted = tier_1.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
	tier_1_adjusted['ref'] = range(1, len(tier_1_adjusted) + 1 )
	tier_1_adjusted['content'] = chunks_1

	chunks_2 = tier_2.groupby(['title', 'url', '_id']).apply(lambda x: "\n...\n".join(x.sort_values('id')['page_content'].values)).values
	tier_2_adjusted = tier_2.groupby(['title', 'url', '_id']).first().reset_index()[['_id', 'title', 'url']]
	tier_2_adjusted['content'] = chunks_2



	# tier_1 = [doc[0] for doc in docs if ((doc[1] < 1))][:5]
	# tier_2 = [doc[0] for doc in docs if ((doc[1] > 0.7)*(doc[1] < 1.5))][10:15]

	return {'tier 1':tier_1_adjusted.loc[:5], 'tier 2': tier_2.loc[:5]}

	def qa_retrieve(query,):

	docs = ""

	global db
	print(db)

	global mp_docs
	thoughts = retrieve_thoughts(query)
	if not(thoughts):

	if mp_docs:
	thoughts = mp_docs
	else:
	mp_docs = thoughts

	tier_1 = thoughts['tier 1']
	tier_2 = thoughts['tier 2']

	reference = tier_1[['ref', 'url', 'title']].to_dict('records')

	tier_1 = list(tier_1.apply(lambda x: f"[{int(x['ref'])}] title: {x['title']}\n Content: {x.content}", axis = 1).values)
	tier_2 = list(tier_2.apply(lambda x: f"title: {x['title']}\n Content: {x.content}", axis = 1).values)
	print(f"QUERY: {query}\nTIER 1: {tier_1}\nTIER2: {tier_2}")
	# print(f"DOCS RETRIEVED: {mp_docs.values}")

	# Cynthesis Generation
	session_prompt = """ A bot that is open to discussions about different cultural, philosophical and political exchanges. You will use do different analysis to the articles provided to me. Stay truthful and if you weren't provided any resources give your oppinion only."""
	task = """Create a coherent synthesis in which you use references to the id of articles provided and relevant to the query.

	Follow the example structure:

	The best wine to pair with steak depends on the cut of steak and the preparation. Here are some general guidelines for pairing wine with steak:
	- Choose a dry red wine. The rule of thumb is to choose dry red wines
	- leaner cuts of meat pair with lighter wines, while richer, fattier cuts pair up with high tannin wines that can cut through the fat [1].
	- Consider the cut of steak. Lighter red wines tend to go best with the leaner cuts of steak such as filet mignon, while more marbled, higher fat cuts of meat like a rib eye do well when accompanied by more robust red wines [3].
	- Take into account the preparation. For a spiced steak, go for a wine with lots of fruit to balance out the heat, like an Old Vine Zinfandel. And if you're drowning your steak in a decadent sauce, find a wine with enough body to stand up to it, like a Cabernet Sauvignon [5].
	- Popular wine choices include Cabernet Sauvignon, Pinot Noir, Zinfandel, Malbec, Syrah, and Merlot [2].
	Remember, the goal is to choose a wine that complements the cut of steak and not overwhelm or take away from the flavor of the meat [3]."
	"""

	prompt = PromptTemplate(
	input_variables=["query", "task", "session_prompt", "articles"],
	template="""
	You are a {session_prompt}
	{task}

	query: {query}

	Articles:
	{articles}

	Make sure to quote the article used if the argument corresponds to the query.
	Use careful reasoning to explain your answer and give your conclusion about this.
	""",
	)

	# llm = BardLLM()
	chain = LLMChain(llm=llm, prompt = prompt)

	response = chain.run(query=query, articles="\n".join(tier_1), session_prompt = session_prompt, task = task)

	for i in range(1, 6):
	response = response.replace(f'[{i}]', f"<span class='text-primary'>[{i}]</span>")

	# Generate related questions
	prompt_q = PromptTemplate(
	input_variables=[ "session_prompt", "articles"],
	template="""
	You are a {session_prompt}
	Give general/global questions related the following articles:

	Articles:
	{articles}

	Make sure not to ask specific questions, keep them general, short and concise.
	""",
	)

	chain_q = LLMChain(llm=llm, prompt = prompt_q)

	questions = chain_q.run(session_prompt = session_prompt, articles = "\n".join(tier_2), )
	questions = questions[questions.index('1'):]

	questions = [ remove_numbers(t).strip() for (i, t) in enumerate(questions.split('.')) if len(t) > 5][:5]

	json_resp = {'cynthesis': response, 'questions': questions, 'reference': reference}

	return json_resp

	examples = [
	["Will Russia win the war in Ukraine?"],

	]
	demo = gr.Interface(fn=qa_retrieve, title="cicero-qa-api",
	inputs=gr.inputs.Textbox(lines=5, label="what would you like to learn about?"),
	outputs="json",examples=examples)

	demo.launch()