Tibet / app.py
Goutam2023's picture
Update app.py
adcf77e
# -*- coding: utf-8 -*-
"""
Created on Fri May 19 10:37:00 2023
@author: Goutam
"""
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA,VectorDBQA
from langchain.document_loaders import PyPDFLoader
import re
import os
import gradio as gr
#from langchain.document_loaders import TextLoader
#Other loaders PyPDFLoader,PyPDFDirectoryLoader
#from langchain.document_loaders import UnstructuredFileLoader
"""
loader = PyPDFLoader("FPC.pdf")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
#Have changed code to persist and retrieve
docsearch = Chroma.from_documents(texts, embeddings,persist_directory="products/")
docsearch.persist()
docsearch = None
"""
os.environ['OPENAI_API_KEY']='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'
def cccs_demo(question):
embeddings = OpenAIEmbeddings()
docsearch = Chroma(persist_directory="Tibet/", embedding_function=embeddings)
#Custom prompt
from langchain.prompts import PromptTemplate
prompt_template = """Use the documents uploaded on Tibet Borders and census, to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.Please answer both in English and Chinese and respond in the format ENGLISH=<english answer>.
CHINESE=<Chinese answer>.
{context}
Question: {question}
Answer:"""
PROMPT = PromptTemplate(
template=prompt_template, input_variables=["context", "question"]
)
chain_type_kwargs = {"prompt": PROMPT}
llm=OpenAI(temperature=0)
#Have commented the original below
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs)
#new function
#qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
query = question
#answer = qa.run(query)
answer = qa({"query": query})
full_result = answer['result']
print("Answer ",full_result)
"""
english_group = re.search('ENGLISH=(.*)CHINESE=', full_result)
english_answer = english_group.group(1)
chinese_group = re.search('CHINESE=(.*)END', full_result)
chinese_answer = chinese_group.group(1)
print("English Answer-",english_answer)
print("Chinese Answer-",chinese_answer)
"""
source_docs = answer['source_documents']
print("Number of sources ",len(source_docs))
# Add Doc summary
chain_type_kwargs = {"prompt": PROMPT}
#Have commented the original below
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs)
#new function
#qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb)
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())
query = "Please give summary of contents all the documents."
#answer = qa.run(query)
answer = qa({"query": query})
doc_summary = answer['result']
print("Document Summary-",doc_summary)
# End doc summary
with open("Referenced.txt",'w',encoding='utf-8') as f:
for i in range(len(answer['source_documents'])):
#print("Referred source-",i+1,answer['source_documents'][i])
source_string = str(answer['source_documents'][i])
page_content = re.search('page_content=(.*)metadata=', source_string)
source = page_content.group(1)
source = source.replace('\\n','\n')
source = source.replace('\\uf07d',' ')
source = source.replace('\\xa0',' ')
page_str = "Page Content"+'\n'
print("Page Content",'\n')
print(source)
f.write(page_str)
f.write(source)
f.write('\n')
meta_data_group = re.search('metadata={(.*)}',source_string)
meta_data = meta_data_group.group(1)
meta_str = "Meta Data-"
print("Meta Data-",'\n')
print(meta_data)
f.write(meta_str)
f.write(meta_data)
f.write('\n\n')
ref_str = 'Referenced.txt'
return full_result,doc_summary,ref_str
"""
URL_COM = 'translate.google.com'
URL_HI = 'translate.google.hi'
LANG = "hi" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese
translator = Translator(service_urls=[URL_COM])
translation = translator.translate(answer['result'], dest=LANG)
#print(translation)
translation_str = str(translation)
answer_group = re.search('text=(.*)pronunciation=', translation_str)
answer_hindi = answer_group.group(1)
print("Answer in Hindi-", answer_hindi)
"""
title = "Zero2AI CCCS Demo"
description = "Demonstration of multi-document Q&A on Tibetan Borders."
demo = gr.Interface(cccs_demo, [gr.Textbox(label="Question")],[gr.Textbox(label="Answer"),gr.Textbox(label="Repository Summary"),gr.File(label="Reference Details")], title=title, description=description,allow_flagging='never')
demo.launch()
#formatted_source = source_docs.replace('\\n', '\n').replace('\\t', '\t')
#print("Source Documents ",formatted_source)
#To check source in RetrievalQA
#qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs,return_source_documents=True)
#result
#answer['result']
#answer['source_documents']