Spaces:
Sleeping
Sleeping
| ## Setup | |
| # Import the necessary Libraries | |
| import os | |
| import uuid | |
| import json | |
| import pandas as pd | |
| import gradio as gr | |
| from huggingface_hub import CommitScheduler, HfApi | |
| from pathlib import Path | |
| from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings | |
| from langchain_community.vectorstores import Chroma | |
| from openai import OpenAI | |
| from datetime import datetime | |
| hf_token = os.getenv("HF_TOKEN1") | |
| openai_api = os.getenv("MIT_Project_key") | |
| # Create Client | |
| client = OpenAI( | |
| api_key=openai_api | |
| ) | |
| # Define the embedding model and the vectorstore | |
| embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large') | |
| persisted_vectordb_location = './companies_db/' | |
| # Load the persisted vectorDB | |
| vectorstore_persisted = Chroma( | |
| collection_name="companies_10k_2023", | |
| persist_directory=persisted_vectordb_location, | |
| embedding_function=embedding_model | |
| ) | |
| # Prepare the logging functionality | |
| log_file = Path("logs/") / f"data_{uuid.uuid4()}.json" | |
| log_folder = log_file.parent | |
| #Create scheduler | |
| scheduler = CommitScheduler( | |
| repo_id="Project3", | |
| repo_type="dataset", | |
| folder_path=log_folder, | |
| path_in_repo="data", | |
| every=2, | |
| token=hf_token | |
| ) | |
| # Define the Q&A system message | |
| qna_system_message = """ | |
| System Message: | |
| You are provided with a set of quotes and a question. Your task is to: | |
| Answer the Question: Use the information from the quotes to provide a concise and accurate answer to the question. | |
| Quote Source: Select a relevant quote that supports your answer. | |
| Document Page Number: Indicate the page number from which the quote is taken. | |
| Input: | |
| Quotes: [List of quotes from the document] | |
| Question: [The question to be answered] | |
| Output: | |
| Answer: [Provide the answer to the question here] | |
| Quote: [Select and present the relevant quote here] | |
| Document Page Number: [Specify the page number of the quote]""" | |
| # Define the user message template | |
| qna_user_message_template = """ | |
| ###Context | |
| Here are some documents that are relevant to the question mentioned below. | |
| {context} | |
| ###Question | |
| {question} | |
| """ | |
| # Define the predict function that runs when 'Submit' is clicked or when a API request is made | |
| def predict(user_input,company): | |
| filter = "/content/dataset/" + company + "-10-k-2023.pdf" | |
| relevant_document_chunks = vectorstore_persisted.similarity_search(user_input, k=5, filter={"source":filter}) | |
| # Create context_for_query | |
| context_for_query="" | |
| for i, doc in enumerate(relevant_document_chunks): | |
| context_for_query+=(f"Retrieved chunk {i+1}: \n") | |
| context_for_query+=(doc.page_content + "\n") | |
| context_for_query+=("Source: " + filter + "\n") | |
| context_for_query+=("Page Number: "+ str(doc.metadata['page'])+"\n") | |
| # define | |
| # Create messages | |
| prompt = [ | |
| {'role':'system', 'content': qna_system_message}, | |
| {'role': 'user', 'content': qna_user_message_template.format( | |
| context=context_for_query, | |
| question=user_input | |
| ) | |
| } | |
| ] | |
| # Get response from the LLM | |
| try: | |
| response = client.chat.completions.create( | |
| model='gpt-3.5-turbo', | |
| messages=prompt, | |
| temperature=0 | |
| ) | |
| prediction = response.choices[0].message.content | |
| except Exception as e: | |
| prediction = f'Sorry, I encountered the following error: \n {e}' | |
| return prediction | |
| # While the prediction is made, log both the inputs and outputs to a local log file | |
| # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel access | |
| with scheduler.lock: | |
| with log_file.open("a") as f: | |
| f.write(json.dumps( | |
| { | |
| 'user_input': user_input, | |
| 'retrieved_context': context_for_query, | |
| 'model_response': prediction | |
| } | |
| )) | |
| f.write("\n") | |
| return prediction | |
| # companies list for dropdown box | |
| companies=["Meta", "IBM", "google", "msft", "aws"] | |
| # Create the interface | |
| with gr.Blocks() as demo: | |
| with gr.Row(): | |
| dropdown=gr.Dropdown( | |
| choices=companies, | |
| label='Company_file' | |
| ) | |
| textbox=gr.Textbox( | |
| label='Enter your query', | |
| placeholder='Type your query here' | |
| ) | |
| magic_button=gr.Button("Get Answer") | |
| with gr.Row(): | |
| magic_sauce= gr.Textbox( | |
| label="Answer", | |
| placeholder="Your magic sauce will be displayed here" | |
| ) | |
| magic_button.click( | |
| predict, | |
| inputs=[textbox,dropdown], | |
| outputs=[magic_sauce] | |
| ) | |
| # For the inputs parameter of Interface provide [textbox,company] | |
| demo.launch(share=True, show_error=True, debug=True) | |