AhmedMOstaFA10 commited on
Commit
25ca582
·
verified ·
1 Parent(s): 1e3bc37

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile.txt +10 -0
  2. README.txt +24 -0
  3. app.py +247 -0
  4. requirements.txt +5 -0
Dockerfile.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CodeGen Kids Tutor API
2
+
3
+ This is an API for serving the CodeGen Kids Tutor model, which generates Python coding problems suitable for children aged 10-13 years.
4
+
5
+ ## Features
6
+
7
+ - Generates age-appropriate coding problems
8
+ - Provides starter code that kids can complete
9
+ - Checks solutions for correctness
10
+ - Offers helpful feedback
11
+
12
+ ## API Endpoints
13
+
14
+ - `GET /`: Health check endpoint
15
+ - `POST /generate-problem`: Generate a new coding problem
16
+ - `POST /check-solution`: Check a student's solution
17
+
18
+ ## Integration
19
+
20
+ You can integrate this API with your front-end using the provided `code_snippet.js` file.
21
+
22
+ ## About the model
23
+
24
+ This model is fine-tuned on CodeGen to generate Python programming exercises specifically designed for children learning to code.
app.py ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from pydantic import BaseModel
4
+ import torch
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM
6
+ import random
7
+ import re
8
+ from typing import List, Dict, Any, Optional
9
+
10
+ app = FastAPI(title="CodeGen Kids Tutor API")
11
+
12
+ # Add CORS middleware
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"], # For production, specify your frontend domain
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ # Model loading
22
+ print("Loading model and tokenizer...")
23
+ MODEL_NAME = "AhmedMOstaFA10/codegen-kids-tutor"
24
+ tokenizer = None
25
+ model = None
26
+
27
+ class ProblemRequest(BaseModel):
28
+ category: Optional[str] = None # Optional category to filter problem prompts
29
+
30
+ class SolutionRequest(BaseModel):
31
+ code: str
32
+ reference_code: str
33
+
34
+ # Problem prompts categorized by topic
35
+ problem_prompts = {
36
+ "arithmetic": [
37
+ "# Instruction:\nGenerate a simple arithmetic problem suitable for a kid. Write a function with a short docstring and partial code.\n\n"
38
+ "# Input:\nAddition, subtraction, or multiplication\n\n"
39
+ "# Solution:\n"
40
+ ],
41
+ "strings": [
42
+ "# Instruction:\nGenerate a basic string manipulation exercise suitable for a beginner. Write a function with a short docstring and partial code.\n\n"
43
+ "# Input:\nA string operation like reversing, counting characters, or checking substrings\n\n"
44
+ "# Solution:\n"
45
+ ],
46
+ "lists": [
47
+ "# Instruction:\nGenerate a simple list-related problem for beginners. Write a function with a short docstring and partial implementation.\n\n"
48
+ "# Input:\nSorting a list, finding max or min, or summing numbers\n\n"
49
+ "# Solution:\n"
50
+ ],
51
+ "conditions": [
52
+ "# Instruction:\nGenerate a basic Python problem using if-else conditions. Write a function with a docstring and a few lines of partial code.\n\n"
53
+ "# Input:\nAge check, number comparison, or grade classification\n\n"
54
+ "# Solution:\n"
55
+ ],
56
+ "loops": [
57
+ "# Instruction:\nCreate a beginner-friendly problem that uses a for loop. Write a function with a clear docstring and partial implementation.\n\n"
58
+ "# Input:\nSumming numbers, iterating over lists, or counting even numbers\n\n"
59
+ "# Solution:\n",
60
+ "# Instruction:\nWrite a basic programming problem involving a while loop. Include a function definition, a short docstring, and partial implementation.\n\n"
61
+ "# Input:\nRepeating until condition is met, counting, or basic input validation\n\n"
62
+ "# Solution:\n"
63
+ ],
64
+ "dictionaries": [
65
+ "# Instruction:\nGenerate an easy dictionary-based Python exercise. Write a function with a short docstring and partial implementation.\n\n"
66
+ "# Input:\nAccessing values, summing values, or checking keys in a dictionary\n\n"
67
+ "# Solution:\n"
68
+ ],
69
+ "input_output": [
70
+ "# Instruction:\nWrite a problem simulating user input and output in Python. Provide a function with a docstring and a few lines of implementation.\n\n"
71
+ "# Input:\nName, age, or favorite color, and return a formatted string\n\n"
72
+ "# Solution:\n"
73
+ ],
74
+ "math": [
75
+ "# Instruction:\nGenerate a Python problem that implements a basic math formula. Include a function with a docstring and partial code.\n\n"
76
+ "# Input:\nArea of circle, BMI calculation, or temperature conversion\n\n"
77
+ "# Solution:\n"
78
+ ],
79
+ "boolean": [
80
+ "# Instruction:\nCreate a beginner-friendly Python exercise using boolean logic. Write a function with a docstring and partial implementation.\n\n"
81
+ "# Input:\nCheck conditions like even AND positive, or NOT equal to zero\n\n"
82
+ "# Solution:\n"
83
+ ]
84
+ }
85
+
86
+ # Get all prompts in a single list for random selection
87
+ all_prompts = []
88
+ for category_prompts in problem_prompts.values():
89
+ all_prompts.extend(category_prompts)
90
+
91
+ @app.on_event("startup")
92
+ async def startup_event():
93
+ global tokenizer, model
94
+ try:
95
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
96
+ if tokenizer.pad_token is None:
97
+ tokenizer.pad_token = tokenizer.eos_token
98
+
99
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
100
+ model.config.pad_token_id = tokenizer.pad_token_id
101
+
102
+ # Check for GPU availability
103
+ device = "cuda" if torch.cuda.is_available() else "cpu"
104
+ model = model.to(device)
105
+ print(f"Model loaded successfully on {device}")
106
+ except Exception as e:
107
+ print(f"Error loading model: {str(e)}")
108
+ # We'll initialize lazily if this fails on startup
109
+
110
+ def get_model():
111
+ global tokenizer, model
112
+
113
+ if tokenizer is None or model is None:
114
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
115
+ if tokenizer.pad_token is None:
116
+ tokenizer.pad_token = tokenizer.eos_token
117
+
118
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
119
+ model.config.pad_token_id = tokenizer.pad_token_id
120
+
121
+ # Check for GPU availability
122
+ device = "cuda" if torch.cuda.is_available() else "cpu"
123
+ model = model.to(device)
124
+
125
+ return tokenizer, model
126
+
127
+ def generate_full_solution(prompt):
128
+ tokenizer, model = get_model()
129
+
130
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
131
+
132
+ with torch.no_grad():
133
+ generated_ids = model.generate(
134
+ inputs["input_ids"],
135
+ max_length=256,
136
+ num_return_sequences=1,
137
+ temperature=0.7,
138
+ top_p=0.95,
139
+ do_sample=True,
140
+ pad_token_id=tokenizer.pad_token_id
141
+ )
142
+
143
+ full_solution = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
144
+ return full_solution
145
+
146
+ def truncate_function_body(code):
147
+ lines = code.strip().split('\n')
148
+ truncated = []
149
+ for line in lines:
150
+ stripped = line.strip()
151
+ truncated.append(line)
152
+ if stripped.startswith('return') or stripped.startswith('print'):
153
+ break
154
+ if len(truncated) >= 4:
155
+ break
156
+ return '\n'.join(truncated)
157
+
158
+ @app.get("/")
159
+ def read_root():
160
+ return {"message": "CodeGen Kids Tutor API is running!"}
161
+
162
+ @app.post("/generate-problem")
163
+ def generate_problem(request: ProblemRequest):
164
+ try:
165
+ # Select prompts based on category if provided
166
+ selected_prompts = []
167
+ if request.category and request.category in problem_prompts:
168
+ selected_prompts = problem_prompts[request.category]
169
+ else:
170
+ selected_prompts = all_prompts
171
+
172
+ if not selected_prompts:
173
+ raise HTTPException(status_code=400, detail="No problem prompts available for the selected category")
174
+
175
+ problem_prompt = random.choice(selected_prompts)
176
+ complete_solution = generate_full_solution(problem_prompt)
177
+
178
+ # Extract problem statement and function code
179
+ split = complete_solution.strip().split('\n')
180
+ problem_lines = []
181
+ function_lines = []
182
+
183
+ for line in split:
184
+ if line.strip().startswith("def ") or line.strip().startswith('"""') or line.strip().startswith("#"):
185
+ function_lines.append(line)
186
+ else:
187
+ problem_lines.append(line)
188
+
189
+ current_problem = '\n'.join(problem_lines[:2]).strip()
190
+ truncated_solution = truncate_function_body('\n'.join(function_lines))
191
+
192
+ return {
193
+ "problem": current_problem,
194
+ "starter_code": truncated_solution,
195
+ "reference_code": truncated_solution # For verification later
196
+ }
197
+ except Exception as e:
198
+ raise HTTPException(status_code=500, detail=f"Error generating problem: {str(e)}")
199
+
200
+ @app.post("/check-solution")
201
+ def check_solution(request: SolutionRequest):
202
+ try:
203
+ user_solution = request.code.strip()
204
+ reference_code = request.reference_code.strip()
205
+
206
+ # Basic syntax check
207
+ try:
208
+ compile(user_solution, '<string>', 'exec')
209
+ except Exception as e:
210
+ return {
211
+ "is_correct": False,
212
+ "feedback": f"Syntax error: {str(e)}"
213
+ }
214
+
215
+ # Function name check
216
+ model_func_match = re.search(r'def\s+([a-zA-Z_][a-zA-Z0-9_]*)', reference_code)
217
+ user_func_match = re.search(r'def\s+([a-zA-Z_][a-zA-Z0-9_]*)', user_solution)
218
+
219
+ if model_func_match and user_func_match:
220
+ if model_func_match.group(1) != user_func_match.group(1):
221
+ return {
222
+ "is_correct": False,
223
+ "feedback": "You changed the function name. Keep the original function name."
224
+ }
225
+
226
+ # Import difflib for sequence matching to evaluate solution similarity
227
+ from difflib import SequenceMatcher
228
+ similarity = SequenceMatcher(None, reference_code, user_solution).ratio()
229
+
230
+ if similarity > 0.5:
231
+ return {
232
+ "is_correct": True,
233
+ "feedback": "Your solution looks correct! Great job! 🎉"
234
+ }
235
+ elif similarity > 0.3:
236
+ return {
237
+ "is_correct": True,
238
+ "feedback": "Your solution passes, but there might be a more efficient approach. Keep going! 👍"
239
+ }
240
+ else:
241
+ return {
242
+ "is_correct": False,
243
+ "feedback": "Your solution differs significantly from the expected solution. Try again! 🔄"
244
+ }
245
+ except Exception as e:
246
+ raise HTTPException(status_code=500, detail=f"Error checking solution: {str(e)}")
247
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ numpy