Spaces:
Sleeping
Sleeping
| import os | |
| import openai | |
| from llama_index import download_loader, set_global_service_context, VectorStoreIndex, LLMPredictor, PromptHelper, ServiceContext, StorageContext, load_index_from_storage | |
| from llama_index.readers import SimpleDirectoryReader | |
| from llama_index.tools import query_engine | |
| from langchain import OpenAI | |
| import gradio as gr | |
| from dotenv import load_dotenv | |
| import sys | |
| import shutil | |
| import pickle | |
| load_dotenv() | |
| #OPENAI_API_KEY=os.getenv("OPENAI_API_KEY") | |
| openai.api_key = os.environ["OPENAI_API_KEY"] | |
| MODEL_NAME=os.getenv("MODEL_NAME") | |
| docsdir = "uploadeddocs" | |
| indexdir = "uploadedindex" | |
| num_outputs = 512 | |
| prompt_helper = PromptHelper(context_window = 3900, num_output = num_outputs, chunk_overlap_ratio= 0.1) | |
| llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name=MODEL_NAME, max_tokens=num_outputs)) | |
| service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor) | |
| # set_global_service_context(service_context) | |
| def construct_index(directory_path): | |
| storage_context = StorageContext.from_defaults() | |
| documents = SimpleDirectoryReader(directory_path).load_data() | |
| index = VectorStoreIndex.from_documents(documents, | |
| llm_predictor=llm_predictor, | |
| prompt_helper=prompt_helper, | |
| storage_context=storage_context, | |
| service_context=service_context) | |
| index.storage_context.persist(persist_dir=f'./uploadedindex') | |
| # with open('./uploadedindex/index.pickle', 'wb') as handle: | |
| # pickle.dump(index, handle, protocol=pickle.HIGHEST_PROTOCOL) | |
| # index = load_index_from_storage(storage_context) | |
| # query_engine = index.as_query_engine() | |
| # return query_engine | |
| def load_index(directory_path): | |
| storage_context = StorageContext.from_defaults(persist_dir=f'./uploadedindex') | |
| index = load_index_from_storage(storage_context) | |
| query_engine = index.as_query_engine() | |
| return query_engine | |
| def process_upload(upload): | |
| filename = upload.name | |
| print(filename) | |
| basename = os.path.basename(filename) | |
| print(basename) | |
| for f in os.listdir(indexdir): | |
| os.remove(os.path.join(indexdir, f)) | |
| for f in os.listdir(docsdir): | |
| os.remove(os.path.join(docsdir, f)) | |
| shutil.copyfile(filename, docsdir + "/" + basename) | |
| construct_index(docsdir) | |
| text = "File Uploaded and Processed" | |
| return text | |
| def chatbot(input_text): | |
| storage_context = StorageContext.from_defaults(persist_dir=f'./uploadedindex') | |
| index = load_index_from_storage(storage_context) | |
| # with open('./uploadedindex/index.pickle', 'rb') as handle: | |
| # index = pickle.load(handle) | |
| query_engine = index.as_query_engine() | |
| # query = input_text + "explain in at least 100 words" | |
| query = input_text + " if no nouns from the question is present in the book or have synonyms, reply that the question is not relevant to the book. if the question is relevant to the book, explain in at least 100 words in bullet points if possible" | |
| response = query_engine.query(query) | |
| return str(response) | |
| interface1 = gr.Interface(fn=process_upload, inputs=gr.inputs.File(label="Upload a PDF file", type="file"), outputs=gr.outputs.Textbox(label="PDF Text"), title="PDF File Upload") | |
| interface2 = gr.Interface(fn=chatbot, | |
| inputs=gr.inputs.Textbox(lines=7, label="Enter your text"), | |
| outputs="text", | |
| title="Ask Questions from the uploaded Book") | |
| iface = gr.TabbedInterface([interface1, interface2], ["Upload your book", "Ask a Question"]) | |
| # iface.launch(share=True) | |
| iface.launch() | |