Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Fri May 19 10:37:00 2023 | |
| @author: Goutam | |
| """ | |
| from langchain.embeddings.openai import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.llms import OpenAI | |
| from langchain.chains import RetrievalQA,VectorDBQA | |
| from langchain.document_loaders import PyPDFLoader | |
| import re | |
| import os | |
| import gradio as gr | |
| #from langchain.document_loaders import TextLoader | |
| #Other loaders PyPDFLoader,PyPDFDirectoryLoader | |
| #from langchain.document_loaders import UnstructuredFileLoader | |
| """ | |
| loader = PyPDFLoader("FPC.pdf") | |
| documents = loader.load() | |
| text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
| texts = text_splitter.split_documents(documents) | |
| embeddings = OpenAIEmbeddings() | |
| #Have changed code to persist and retrieve | |
| docsearch = Chroma.from_documents(texts, embeddings,persist_directory="products/") | |
| docsearch.persist() | |
| docsearch = None | |
| """ | |
| os.environ['OPENAI_API_KEY']='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' | |
| def cccs_demo(question): | |
| embeddings = OpenAIEmbeddings() | |
| docsearch = Chroma(persist_directory="Tibet/", embedding_function=embeddings) | |
| #Custom prompt | |
| from langchain.prompts import PromptTemplate | |
| prompt_template = """Use the documents uploaded on Tibet Borders and census, to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.Please answer both in English and Chinese and respond in the format ENGLISH=<english answer>. | |
| CHINESE=<Chinese answer>. | |
| {context} | |
| Question: {question} | |
| Answer:""" | |
| PROMPT = PromptTemplate( | |
| template=prompt_template, input_variables=["context", "question"] | |
| ) | |
| chain_type_kwargs = {"prompt": PROMPT} | |
| llm=OpenAI(temperature=0) | |
| #Have commented the original below | |
| qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs) | |
| #new function | |
| #qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb) | |
| #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever()) | |
| query = question | |
| #answer = qa.run(query) | |
| answer = qa({"query": query}) | |
| full_result = answer['result'] | |
| print("Answer ",full_result) | |
| """ | |
| english_group = re.search('ENGLISH=(.*)CHINESE=', full_result) | |
| english_answer = english_group.group(1) | |
| chinese_group = re.search('CHINESE=(.*)END', full_result) | |
| chinese_answer = chinese_group.group(1) | |
| print("English Answer-",english_answer) | |
| print("Chinese Answer-",chinese_answer) | |
| """ | |
| source_docs = answer['source_documents'] | |
| print("Number of sources ",len(source_docs)) | |
| # Add Doc summary | |
| chain_type_kwargs = {"prompt": PROMPT} | |
| #Have commented the original below | |
| qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True,chain_type_kwargs=chain_type_kwargs) | |
| #new function | |
| #qa = VectorDBQA.from_chain_type(llm=OpenAI(), chain_type="stuff", vectorstore=vectordb) | |
| #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever()) | |
| query = "Please give summary of contents all the documents." | |
| #answer = qa.run(query) | |
| answer = qa({"query": query}) | |
| doc_summary = answer['result'] | |
| print("Document Summary-",doc_summary) | |
| # End doc summary | |
| with open("Referenced.txt",'w',encoding='utf-8') as f: | |
| for i in range(len(answer['source_documents'])): | |
| #print("Referred source-",i+1,answer['source_documents'][i]) | |
| source_string = str(answer['source_documents'][i]) | |
| page_content = re.search('page_content=(.*)metadata=', source_string) | |
| source = page_content.group(1) | |
| source = source.replace('\\n','\n') | |
| source = source.replace('\\uf07d',' ') | |
| source = source.replace('\\xa0',' ') | |
| page_str = "Page Content"+'\n' | |
| print("Page Content",'\n') | |
| print(source) | |
| f.write(page_str) | |
| f.write(source) | |
| f.write('\n') | |
| meta_data_group = re.search('metadata={(.*)}',source_string) | |
| meta_data = meta_data_group.group(1) | |
| meta_str = "Meta Data-" | |
| print("Meta Data-",'\n') | |
| print(meta_data) | |
| f.write(meta_str) | |
| f.write(meta_data) | |
| f.write('\n\n') | |
| ref_str = 'Referenced.txt' | |
| return full_result,doc_summary,ref_str | |
| """ | |
| URL_COM = 'translate.google.com' | |
| URL_HI = 'translate.google.hi' | |
| LANG = "hi" #hi is for Hindi, en for English, zh or zh-CN for chinese simplified,zh-TW for traditional chinese | |
| translator = Translator(service_urls=[URL_COM]) | |
| translation = translator.translate(answer['result'], dest=LANG) | |
| #print(translation) | |
| translation_str = str(translation) | |
| answer_group = re.search('text=(.*)pronunciation=', translation_str) | |
| answer_hindi = answer_group.group(1) | |
| print("Answer in Hindi-", answer_hindi) | |
| """ | |
| title = "Zero2AI CCCS Demo" | |
| description = "Demonstration of multi-document Q&A on Tibetan Borders." | |
| demo = gr.Interface(cccs_demo, [gr.Textbox(label="Question")],[gr.Textbox(label="Answer"),gr.Textbox(label="Repository Summary"),gr.File(label="Reference Details")], title=title, description=description,allow_flagging='never') | |
| demo.launch() | |
| #formatted_source = source_docs.replace('\\n', '\n').replace('\\t', '\t') | |
| #print("Source Documents ",formatted_source) | |
| #To check source in RetrievalQA | |
| #qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs,return_source_documents=True) | |
| #result | |
| #answer['result'] | |
| #answer['source_documents'] | |