Spaces:
Sleeping
Sleeping
Upload questions_dataset.py
Browse files- utils/questions_dataset.py +69 -0
utils/questions_dataset.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import chromadb
|
| 3 |
+
from google.genai import types
|
| 4 |
+
|
| 5 |
+
def preprocess_questions():
|
| 6 |
+
df = pd.read_parquet("hf://datasets/hails/agieval-lsat-lr/data/test-00000-of-00001.parquet")
|
| 7 |
+
print(df.head())
|
| 8 |
+
|
| 9 |
+
lrq_docs = []
|
| 10 |
+
for _, row in df.iterrows():
|
| 11 |
+
q, c, g = row
|
| 12 |
+
doc = f'(question: "{q}", choices: {c}, gold: {g})'
|
| 13 |
+
lrq_docs.append(doc)
|
| 14 |
+
|
| 15 |
+
chroma_client = chromadb.Client()
|
| 16 |
+
collection = chroma_client.create_collection(name="lsat-lr")
|
| 17 |
+
collection.add(
|
| 18 |
+
documents=lrq_docs,
|
| 19 |
+
metadatas=[{"question": q, "choices": "\n".join(c), "gold": int(g[0])} for q, c, g in zip(df["query"], df["choices"], df["gold"])],
|
| 20 |
+
ids=[str(i) for i in range(len(lrq_docs))],
|
| 21 |
+
)
|
| 22 |
+
return collection
|
| 23 |
+
|
| 24 |
+
def get_logical_reasoning_practice_questions(query: str) -> str:
|
| 25 |
+
"""
|
| 26 |
+
Use to get logical reasoning practice questions from database after user has studied.
|
| 27 |
+
Uses query to search the database.
|
| 28 |
+
Returns top 5 results in the format:
|
| 29 |
+
(question: "question", choices: [choices], gold: [gold]).
|
| 30 |
+
"""
|
| 31 |
+
collection = preprocess_questions()
|
| 32 |
+
results = collection.query(query_texts=[query], n_results=5)['documents'][0]
|
| 33 |
+
# print(results)
|
| 34 |
+
return '\n\n'.join(results)
|
| 35 |
+
|
| 36 |
+
def get_model_tools():
|
| 37 |
+
get_practice_questions_function = {
|
| 38 |
+
"name": "get_practice_questions",
|
| 39 |
+
"description": get_logical_reasoning_practice_questions.__doc__,
|
| 40 |
+
"parameters": {
|
| 41 |
+
"type": "object",
|
| 42 |
+
"properties": {
|
| 43 |
+
"query": {
|
| 44 |
+
"type": "string",
|
| 45 |
+
"description": "string to query the database with"
|
| 46 |
+
}
|
| 47 |
+
},
|
| 48 |
+
"required": ["query"]
|
| 49 |
+
},
|
| 50 |
+
}
|
| 51 |
+
tools = types.Tool(function_declarations=[get_practice_questions_function])
|
| 52 |
+
return tools
|
| 53 |
+
|
| 54 |
+
system_instruction = """You are an AI tutor that teaches users LSAT Logical Reasoning.
|
| 55 |
+
Here is how your student performed on the practice quiz grouped by question type (num correct/num questions):
|
| 56 |
+
Assumtion: (1/1)
|
| 57 |
+
Find the flaw in the argument: (1/2)
|
| 58 |
+
Inferece: (1/1)
|
| 59 |
+
Justify the conclusion: (0/1)
|
| 60 |
+
Method of reasoning: (1/1)
|
| 61 |
+
Point at issue: (0/1)
|
| 62 |
+
Role Play: (1/1)
|
| 63 |
+
Strengthen: (0/1)
|
| 64 |
+
Weaken the argument: (1/1)
|
| 65 |
+
Based on this, classify them as Beginner / Intermediate / Advanced. Walk through the student on all topics, but focus on the ones they struggle with.
|
| 66 |
+
Question the user to ensure that they understand the material.
|
| 67 |
+
Use practice questions from the tool to ensure they understand the material.
|
| 68 |
+
Never give a one word answer. Always keep the conversation moving.
|
| 69 |
+
Once the user has studied all the topics, return <DONE>. """
|