Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import openai | |
| import pandas as pd | |
| import numpy as np | |
| import csv | |
| import os | |
| from datasets import load_dataset | |
| openai.api_key= os.environ.get("openai.api_key") | |
| from openai.embeddings_utils import get_embedding | |
| import requests | |
| model_id = "sentence-transformers/all-MiniLM-L6-v" | |
| import json | |
| hf_token = os.environ.get("hugginface.api.token") | |
| import re | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| def generate_embeddings(texts, model_id, hf_token): | |
| api_url = f"https://api-inference.huggingface.co/pipeline/feature-extraction/{model_id}" | |
| headers = {"Authorization": f"Bearer {hf_token}"} | |
| response = requests.post(api_url, headers=headers, json={"inputs": texts, "options":{"wait_for_model":True}}) | |
| embeddings = response.json() | |
| return embeddings | |
| AP_Bio = load_dataset('vjain/biology_AP_embeddings') | |
| df1 = pd.DataFrame(AP_Bio['train']) | |
| df1["similarity"] = 0 | |
| AP_Physics = load_dataset('vjain/AP_physics_embeddings') | |
| df2 = pd.DataFrame(AP_Physics['train']) | |
| df2["similarity"] = 0 | |
| Personality = load_dataset('vjain/Personality_em') | |
| df3 = pd.DataFrame(Personality['train']) | |
| df3["similarity"] = 0 | |
| AP_statistics = load_dataset('vjain/AP_statistics') | |
| df4 = pd.DataFrame(AP_statistics['train']) | |
| df4["similarity"] = 0 | |
| tax_embeddings = load_dataset('vjain/tax_embeddings') | |
| df5 = pd.DataFrame(tax_embeddings['train']) | |
| df5["similarity"] = 0 | |
| therapy = load_dataset('vjain/therapy') | |
| df6 = pd.DataFrame(therapy['train']) | |
| df6["similarity"] = 0 | |
| dataframes = { | |
| "AP_Bio": df1, | |
| "AP_Physics": df2, | |
| "Personality" : df3, | |
| "AP_statistics": df4, | |
| "tax_embeddings": df5, | |
| "therapy": df6 | |
| } | |
| #df = pd.read_csv("TA_embeddings.csv") | |
| #df["embedding"]=df["embedding"].apply(eval).apply(np.array) | |
| def reply(input, dataset_name): | |
| try: | |
| if dataset_name not in dataframes: | |
| return "Invalid dataset selected. Please select a valid dataset." | |
| if not input: | |
| return "Please Enter a Question to get an Answer" | |
| df = dataframes[dataset_name] | |
| input = input | |
| input_vector = generate_embeddings(input, model_id,hf_token) | |
| df["similarities"]=df["embedding"].apply(lambda x: cosine_similarity([x],[input_vector])[0][0]) | |
| data = df.sort_values("similarities", ascending=False).head(5) | |
| data.to_csv("sorted.csv") | |
| context = [] | |
| for i, row in data.iterrows(): | |
| context.append(row['text']) | |
| context | |
| text = "\n".join(context) | |
| context = text | |
| prompt = f""" | |
| Answer the following question using the context given below.If you don't know the answer for certain, say I don't know. | |
| Context: {context} | |
| Q: {input} | |
| """ | |
| response= openai.Completion.create( | |
| prompt=prompt, | |
| temperature=1, | |
| max_tokens=500, | |
| top_p=1, | |
| frequency_penalty=0, | |
| presence_penalty=0, | |
| model="text-davinci-003" | |
| )["choices"][0]["text"].strip(" \n") | |
| return response | |
| except Exception as e: | |
| return f"An error occurred: {e}" | |
| csv_dropdown = gr.inputs.Dropdown( | |
| label="Select the Book", | |
| choices=["AP_Bio", "AP_Physics","Personality","AP_statistics","tax_embeddings","therapy"], | |
| default="AP_Bio" | |
| ) | |
| input_text = gr.inputs.Textbox( | |
| label="Enter your questions here", | |
| placeholder="E.g. What is DNA?", | |
| lines=3 | |
| ) | |
| text_output = gr.outputs.Textbox(label="Answer") | |
| description = "Scholar Bot is a question answering system designed to provide accurate and relevant answers to questions from this book hosted by OpenStax https://openstax.org/details/books/biology-ap-courses. Simply enter your question in the text box above and Scholar Bot will use advanced natural language processing algorithms to search a large corpus of biology text to find the best answer for you. Scholar Bot uses the Sentence Transformers model to generate embeddings of text, and OpenAI's GPT-3 language model to provide answers to your questions." | |
| ui = gr.Interface(fn=reply, | |
| inputs=[input_text, csv_dropdown], | |
| outputs=[text_output], | |
| title="Scholar Bot", | |
| description=description, | |
| theme="light", | |
| layout="vertical", | |
| allow_flagging=False, | |
| examples=[["What is the function of DNA polymerase?", "AP_Bio"]] | |
| ) | |
| ui.launch() |