Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Fri May 19 10:37:00 2023 | |
| @author: Goutam | |
| """ | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.llms import OpenAI | |
| from langchain.chains import RetrievalQA | |
| from langchain.document_loaders import PyPDFLoader | |
| from langchain.chat_models import ChatOpenAI | |
| from googletrans import Translator | |
| import re | |
| import os | |
| import gradio as gr | |
| #from langchain.document_loaders import TextLoader | |
| #Other loaders PyPDFLoader,PyPDFDirectoryLoader | |
| #from langchain.document_loaders import UnstructuredFileLoader | |
| os.environ['OPENAI_API_KEY']='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' | |
| def translate(in_str): | |
| URL_COM = 'translate.google.com' | |
| LANG = "zh-CN" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese | |
| translator = Translator(service_urls=[URL_COM]) | |
| translation = translator.translate(in_str, dest=LANG) | |
| #print(translation) | |
| translation_str = str(translation) | |
| answer_group = re.search('text=(.*)pronunciation=', translation_str) | |
| answer_chinese="" | |
| if answer_group is not None: | |
| answer_chinese = answer_group.group(1) | |
| print("Group not none-",answer_chinese) | |
| else: | |
| answer_chinese = translation_str | |
| print("Group is None") | |
| #print("Answer in Chinese-", answer_chinese) | |
| return answer_chinese | |
| def cccs_demo(question): | |
| embeddings = OpenAIEmbeddings() | |
| docsearch = Chroma(persist_directory="ChinaDB/", embedding_function=embeddings) | |
| #Custom prompt | |
| from langchain.prompts import PromptTemplate | |
| prompt_template = """Use the documents uploaded on China, to answer the question at the end. If you don't know the answer' | |
| {context} | |
| Question: {question} | |
| Answer:""" | |
| PROMPT = PromptTemplate( | |
| template=prompt_template, input_variables=["context", "question"] | |
| ) | |
| chain_type_kwargs = {"prompt": PROMPT} | |
| llm = OpenAI(temperature=0) | |
| #Have commented the original below | |
| qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs) | |
| #new function | |
| #qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb) | |
| #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever()) | |
| query = question | |
| #answer = qa.run(query) | |
| answer = qa({"query": query}) | |
| full_result = answer['result'] | |
| print("Answer ",full_result) | |
| chinese_result = "" | |
| chinese_result = translate(full_result) | |
| final_result = full_result + '\n' + chinese_result | |
| print("Final result-",final_result) | |
| """ | |
| english_group = re.search('ENGLISH=(.*)CHINESE=', full_result) | |
| english_answer = english_group.group(1) | |
| chinese_group = re.search('CHINESE=(.*)END', full_result) | |
| chinese_answer = chinese_group.group(1) | |
| print("English Answer-",english_answer) | |
| print("Chinese Answer-",chinese_answer) | |
| """ | |
| source_docs = answer['source_documents'] | |
| print("Number of sources ",len(source_docs)) | |
| #print("Source docs-",source_docs) | |
| # Add Doc summary | |
| chain_type_kwargs = {"prompt": PROMPT} | |
| #Have commented the original below | |
| qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs) | |
| #new function | |
| #qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb) | |
| #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever()) | |
| query = "Please give summary of contents all the documents." | |
| #answer = qa.run(query) | |
| answer = qa({"query": query}) | |
| doc_summary = answer['result'] | |
| print("Document Summary-",doc_summary) | |
| chinese_summary ="" | |
| chinese_summary = translate(doc_summary) | |
| final_summary = doc_summary+ '\n' + chinese_summary | |
| print("Final result-",final_summary) | |
| # End doc summary | |
| ref_str = "None.txt" | |
| source_len = len(source_docs) | |
| if (source_len > 0): | |
| with open("Referenced.txt",'w',encoding='utf-8') as f: | |
| for i in range(len(source_docs)): | |
| #print("Referred source-",i+1,answer['source_documents'][i]) | |
| source_string = str(source_docs[i]) | |
| page_content = re.search('page_content=(.*)metadata=', source_string) | |
| source = page_content.group(1) | |
| source = source.replace('\\n','\n') | |
| source = source.replace('\\uf07d',' ') | |
| source = source.replace('\\xa0',' ') | |
| page_str = "Page Content"+'\n' | |
| print("Page Content",'\n') | |
| print(source) | |
| f.write(page_str) | |
| f.write(source) | |
| f.write('\n') | |
| meta_data_group = re.search('metadata={(.*)}',source_string) | |
| meta_data = meta_data_group.group(1) | |
| meta_str = "Meta Data-" | |
| print("Meta Data-",'\n') | |
| print(meta_data) | |
| f.write(meta_str) | |
| f.write(meta_data) | |
| f.write('\n\n') | |
| ref_str = 'Referenced.txt' | |
| else: | |
| with open("None.txt",'w',encoding='utf-8') as f: | |
| none_str = "No directy sources found" | |
| f.write(none_str) | |
| return final_result,final_summary,ref_str | |
| """ | |
| URL_COM = 'translate.google.com' | |
| URL_HI = 'translate.google.hi' | |
| LANG = "hi" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese | |
| translator = Translator(service_urls=[URL_COM]) | |
| translation = translator.translate(answer['result'], dest=LANG) | |
| #print(translation) | |
| translation_str = str(translation) | |
| answer_group = re.search('text=(.*)pronunciation=', translation_str) | |
| answer_hindi = answer_group.group(1) | |
| print("Answer in Hindi-", answer_hindi) | |
| """ | |
| title = "Zero2AI CCCS Demo" | |
| description = "Demonstration of multi-document and multi-lingual Q&A on China." | |
| demo = gr.Interface(cccs_demo, [gr.Textbox(label="Question")],[gr.Textbox(label="Answer"),gr.Textbox(label="Repository Summary"),gr.File(label="Reference Details")], title=title, description=description,theme=gr.themes.Glass(primary_hue="indigo", secondary_hue="purple")) | |
| demo.queue() | |
| demo.launch() | |
| #formatted_source = source_docs.replace('\\n', '\n').replace('\\t', '\t') | |
| #print("Source Documents ",formatted_source) | |
| #To check source in RetrievalQA | |
| #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs,return_source_documents=True) | |
| #result | |
| #answer['result'] | |
| #answer['source_documents'] | |