Spaces:
Sleeping
Sleeping
| from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext, set_global_service_context, load_index_from_storage, StorageContext, PromptHelper | |
| from llama_index.llms import OpenAI | |
| from langchain.chat_models import ChatOpenAI | |
| from PyPDF2 import PdfReader | |
| import gradio as gr | |
| import sys | |
| import os | |
| try: | |
| from config import OPEN_AI_KEY | |
| os.environ["OPENAI_API_KEY"] = OPEN_AI_KEY | |
| except: | |
| pass | |
| # =============================== | |
| # Settings | |
| # =============================== | |
| MAX_INPUT_SIZE = 4096 | |
| NUM_OUTPUT = 700 | |
| CHUNK_OVERLAP_RATIO = 0.15 | |
| CHUNK_SIZE_LIMIT = 600 | |
| # Define LLM: gpt-3.5-turbo, temp:0.7 | |
| llm = OpenAI(model="gpt-3.5-turbo", temperature=0.7, max_tokens=NUM_OUTPUT) | |
| # Define prompt helper | |
| prompt_helper = PromptHelper(context_window=MAX_INPUT_SIZE, num_output=NUM_OUTPUT, chunk_overlap_ratio=CHUNK_OVERLAP_RATIO, chunk_size_limit=CHUNK_SIZE_LIMIT) | |
| # Set service context | |
| service_context = ServiceContext.from_defaults(llm=llm, prompt_helper=prompt_helper) | |
| set_global_service_context(service_context) | |
| # =============================== | |
| # Functions | |
| # =============================== | |
| def construct_index(directory_path, index_path): | |
| if os.listdir(index_path) != []: | |
| storage_context = StorageContext.from_defaults(persist_dir=index_path) | |
| index = load_index_from_storage(storage_context) | |
| return index | |
| else: | |
| # Load in documents | |
| documents = SimpleDirectoryReader(directory_path).load_data() | |
| # Index documents | |
| index = GPTVectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True) | |
| # Save index | |
| index.storage_context.persist(persist_dir=index_path) | |
| return index | |
| INDEX = construct_index("100_test_docs", "100_test_docs_persist") | |
| QE = INDEX.as_query_engine() | |
| PDF_CONTENT = gr.State("") | |
| def upload_file(file): | |
| try: | |
| read_pdf = PdfReader(file.name) | |
| pdf_text = "\n\n".join([w.extract_text() for w in read_pdf.pages]) | |
| PDF_CONTENT.value = pdf_text | |
| return pdf_text | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| def chatfunc(input_text, chat_history, max_chat_length=6): | |
| prompt = """ | |
| <|SYSTEM|># Your task is to query and use the provided context to help users learn about common core state standards and use it to improve their syllabus. | |
| \n\n | |
| """ | |
| if PDF_CONTENT.value: | |
| prompt = prompt + "The following is the syllabus provided by the user" + PDF_CONTENT.value + "\n\n" | |
| for chat in chat_history[~max_chat_length:]: | |
| user_chat, bot_chat = chat | |
| prompt = f"{prompt}\nUser: {user_chat}\nAssistant: {bot_chat}" | |
| prompt = f"{prompt}\nUser: {input_text}\nAssistant:" | |
| response = QE.query(prompt) | |
| chat_history.append([input_text, response.response]) | |
| return "", chat_history | |
| with gr.Blocks() as iface: | |
| chatbot = gr.Chatbot(height=400) | |
| msg = gr.Textbox(label="Ask the Common Core State Standard Bot anything about curriculum standards") | |
| submit = gr.Button("π¬ Submit") | |
| with gr.Row(): | |
| clear = gr.ClearButton(value="π§Ή Clear outputs", components=[msg, chatbot]) | |
| upload_button = gr.UploadButton("π Upload a Syllabus", file_types=[".pdf"], file_count="single") | |
| with gr.Accordion("π View your syllabus"): | |
| syl = gr.Textbox(label="Your syllabus' content will show here") | |
| msg.submit(chatfunc, [msg, chatbot], [msg, chatbot]) | |
| upload_button.upload(upload_file, upload_button, syl) | |
| iface.launch(share=False) |