david167 commited on
Commit
992eedb
Β·
1 Parent(s): c106c31

Major update: Add NFL training data generation and improve model handling

Browse files

- Add NFL rulebook data processing (2024 NFL Rule Book.csv)
- Add generate_nfl_training_data.py for training data creation
- Add run_nfl_generator.py for easy execution
- Update requirements.txt with comprehensive dependencies
- Improve app.py with better model loading and error handling
- Enhance gradio_app.py with ModelManager class
- Update Dockerfile for better HF Spaces compatibility
- Clean up redundant files and folders

2024 NFL Rule Book.csv ADDED
The diff for this file is too large to render. See raw diff
 
Dockerfile CHANGED
@@ -35,11 +35,13 @@ COPY app.py .
35
  COPY gradio_app.py .
36
  COPY README.md .
37
 
38
- # Create HF cache directory with proper permissions
39
- RUN mkdir -p /app/.cache && chmod -R 777 /app/.cache
40
  ENV HF_HOME=/app/.cache
41
  ENV HF_DATASETS_CACHE=/app/.cache
42
  ENV OMP_NUM_THREADS=4
 
 
43
 
44
  # Expose port
45
  EXPOSE 7860
 
35
  COPY gradio_app.py .
36
  COPY README.md .
37
 
38
+ # Create cache directories with proper permissions
39
+ RUN mkdir -p /app/.cache/matplotlib /app/.cache/fontconfig && chmod -R 777 /app/.cache
40
  ENV HF_HOME=/app/.cache
41
  ENV HF_DATASETS_CACHE=/app/.cache
42
  ENV OMP_NUM_THREADS=4
43
+ ENV MPLCONFIGDIR=/app/.cache/matplotlib
44
+ ENV FONTCONFIG_FILE=/app/.cache/fontconfig
45
 
46
  # Expose port
47
  EXPOSE 7860
app.py CHANGED
@@ -57,19 +57,24 @@ async def load_model_with_retry(model_name: str, hf_token: str, max_retries: int
57
 
58
  # Use Seq2Seq model for T5-based models, CausalLM for others
59
  if "flan-t5" in model_name.lower() or "t5" in model_name.lower():
60
- model = AutoModelForCausalLM.from_pretrained(
61
  model_name,
62
- torch_dtype=torch.float16 if device == "cuda:0" else torch.float32,
63
- device_map={"": 0}, # Force all parameters to GPU 0
64
  trust_remote_code=True,
65
  low_cpu_mem_usage=True,
66
  token=hf_token
67
  )
68
  else:
 
 
 
 
 
69
  model = AutoModelForCausalLM.from_pretrained(
70
  model_name,
71
  torch_dtype=torch.float16 if device == "cuda:0" else torch.float32,
72
- device_map={"": 0}, # Force all parameters to GPU 0
73
  trust_remote_code=True,
74
  low_cpu_mem_usage=True,
75
  use_safetensors=True, # Force safetensors to avoid CVE-2025-32434
@@ -94,12 +99,12 @@ async def load_model():
94
  try:
95
  logger.info("Starting model loading...")
96
 
97
- # Check if CUDA is available
98
  if torch.cuda.is_available():
99
- torch.cuda.set_device(0)
100
- device = "cuda:0"
101
  else:
102
- device = "cpu"
103
  logger.info(f"Using device: {device}")
104
 
105
  if device == "cuda:0":
@@ -116,7 +121,7 @@ async def load_model():
116
  try:
117
  logger.info("Loading model with transformers...")
118
 
119
- # Use FLAN-T5 Large - excellent for question generation and uses standard HF storage
120
  base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
121
 
122
  tokenizer, model = await load_model_with_retry(base_model_name, hf_token)
@@ -185,7 +190,7 @@ app.add_middleware(
185
  )
186
 
187
  def create_question_prompt(statement: str, num_questions: int, difficulty_level: str) -> str:
188
- """Create a prompt for question generation optimized for T5/FLAN models"""
189
 
190
  difficulty_instruction = {
191
  "easy": "simple, straightforward questions that test basic understanding",
@@ -194,18 +199,25 @@ def create_question_prompt(statement: str, num_questions: int, difficulty_level:
194
  "mixed": "a mix of easy, medium, and hard questions"
195
  }
196
 
197
- # T5/FLAN models work better with direct, concise instructions
198
- prompt = f"""Generate {num_questions} {difficulty_instruction[difficulty_level]} about this statement:
 
 
199
 
200
  "{statement}"
201
 
202
  Requirements:
203
- - Clear, well-formed questions
204
  - Vary question types (what, how, why, when, where)
205
  - Number each question (1., 2., 3., etc.)
206
  - End each question with a question mark
 
 
 
207
 
208
- Questions:"""
 
 
209
 
210
  return prompt
211
 
@@ -278,14 +290,18 @@ async def generate_questions(request: QuestionGenerationRequest):
278
 
279
  # Generate response using transformers
280
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
 
 
281
  if device == "cuda:0":
282
- inputs = inputs.to(device)
283
- # Ensure all model parameters are on the same device
284
- if model is not None:
285
- model_device = next(model.parameters()).device
286
- inputs = inputs.to(model_device)
 
 
287
  with torch.no_grad():
288
- # T5 models use generate differently - they don't include input in output
289
  outputs = model.generate(
290
  **inputs,
291
  max_new_tokens=min(request.max_length, 1024),
@@ -293,11 +309,14 @@ async def generate_questions(request: QuestionGenerationRequest):
293
  top_p=0.95,
294
  do_sample=True,
295
  num_beams=1,
 
296
  early_stopping=True
297
  )
298
 
299
- # Decode the generated text (T5 doesn't include input prompt in output)
300
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
301
  logger.info(f"Generated text length: {len(generated_text)}")
302
 
303
  # Extract questions from the generated text
@@ -339,7 +358,7 @@ async def root():
339
  """Root endpoint with basic info"""
340
  return {
341
  "message": "Question Generation API",
342
- "model": "meta-llama/Llama-3.1-8B-Instruct",
343
  "endpoints": {
344
  "health": "/health",
345
  "generate": "/generate-questions",
 
57
 
58
  # Use Seq2Seq model for T5-based models, CausalLM for others
59
  if "flan-t5" in model_name.lower() or "t5" in model_name.lower():
60
+ model = AutoModelForSeq2SeqLM.from_pretrained(
61
  model_name,
62
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
63
+ device_map="auto" if device == "cuda" else None,
64
  trust_remote_code=True,
65
  low_cpu_mem_usage=True,
66
  token=hf_token
67
  )
68
  else:
69
+ # Force model to load on cuda:0 specifically
70
+ if device == "cuda":
71
+ torch.cuda.set_device(0)
72
+ device = "cuda:0"
73
+
74
  model = AutoModelForCausalLM.from_pretrained(
75
  model_name,
76
  torch_dtype=torch.float16 if device == "cuda:0" else torch.float32,
77
+ device_map={"": 0} if device == "cuda:0" else None, # Force all parameters to GPU 0
78
  trust_remote_code=True,
79
  low_cpu_mem_usage=True,
80
  use_safetensors=True, # Force safetensors to avoid CVE-2025-32434
 
99
  try:
100
  logger.info("Starting model loading...")
101
 
102
+ # Check if CUDA is available and force to cuda:0
103
  if torch.cuda.is_available():
104
+ torch.cuda.set_device(0)
105
+ device = "cuda:0"
106
  else:
107
+ device = "cpu"
108
  logger.info(f"Using device: {device}")
109
 
110
  if device == "cuda:0":
 
121
  try:
122
  logger.info("Loading model with transformers...")
123
 
124
+ # Use Llama 3.1 8B Instruct - excellent for question generation
125
  base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
126
 
127
  tokenizer, model = await load_model_with_retry(base_model_name, hf_token)
 
190
  )
191
 
192
  def create_question_prompt(statement: str, num_questions: int, difficulty_level: str) -> str:
193
+ """Create a prompt for question generation optimized for Llama models"""
194
 
195
  difficulty_instruction = {
196
  "easy": "simple, straightforward questions that test basic understanding",
 
199
  "mixed": "a mix of easy, medium, and hard questions"
200
  }
201
 
202
+ # Llama models work better with chat-style prompts
203
+ prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
204
+
205
+ Please generate exactly {num_questions} {difficulty_instruction[difficulty_level]} based on this statement:
206
 
207
  "{statement}"
208
 
209
  Requirements:
210
+ - Create clear, well-formed questions
211
  - Vary question types (what, how, why, when, where)
212
  - Number each question (1., 2., 3., etc.)
213
  - End each question with a question mark
214
+ - Focus only on the content of the statement
215
+
216
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
217
 
218
+ Here are {num_questions} questions based on the statement:
219
+
220
+ """
221
 
222
  return prompt
223
 
 
290
 
291
  # Generate response using transformers
292
  inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
293
+
294
+ # Force all inputs to the same device as the model
295
  if device == "cuda:0":
296
+ # Get the actual device of the model
297
+ model_device = next(model.parameters()).device
298
+ logger.info(f"Model is on device: {model_device}")
299
+
300
+ # Move all input tensors to the same device as the model
301
+ inputs = {k: v.to(model_device) for k, v in inputs.items()}
302
+
303
  with torch.no_grad():
304
+ # Llama models generate text including the input prompt
305
  outputs = model.generate(
306
  **inputs,
307
  max_new_tokens=min(request.max_length, 1024),
 
309
  top_p=0.95,
310
  do_sample=True,
311
  num_beams=1,
312
+ pad_token_id=tokenizer.eos_token_id,
313
  early_stopping=True
314
  )
315
 
316
+ # Decode the generated text and remove the input prompt
317
+ full_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
318
+ # Remove the input prompt from the generated text
319
+ generated_text = full_text[len(prompt):].strip()
320
  logger.info(f"Generated text length: {len(generated_text)}")
321
 
322
  # Extract questions from the generated text
 
358
  """Root endpoint with basic info"""
359
  return {
360
  "message": "Question Generation API",
361
+ "model": "google/flan-t5-large",
362
  "endpoints": {
363
  "health": "/health",
364
  "generate": "/generate-questions",
generate_nfl_training_data.py ADDED
@@ -0,0 +1,454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ NFL Rulebook Training Data Generator
4
+
5
+ This script processes the 2024 NFL rulebook CSV file and generates
6
+ training data for fine-tuning using our Hugging Face model.
7
+
8
+ For each rule, it generates 3 user/assistant prompt pairs using
9
+ the deployed model, then formats them into JSONL for fine-tuning.
10
+ """
11
+
12
+ import csv
13
+ import json
14
+ import random
15
+ import requests
16
+ import time
17
+ import argparse
18
+ from pathlib import Path
19
+ from typing import List, Dict, Any
20
+ import logging
21
+
22
+ # Configure logging
23
+ logging.basicConfig(
24
+ level=logging.INFO,
25
+ format='%(asctime)s - %(levelname)s - %(message)s',
26
+ handlers=[
27
+ logging.FileHandler('nfl_training_data.log'),
28
+ logging.StreamHandler()
29
+ ]
30
+ )
31
+ logger = logging.getLogger(__name__)
32
+
33
+ # Configuration
34
+ HUGGINGFACE_SPACE_URL = "https://david167-question-generation-api.hf.space"
35
+ SYSTEM_MESSAGE = "You are a football broadcaster with years of experience and inside knowledge of the game from playing and coaching. You have a complete understanding of the rule book, how it's interpreted and judged."
36
+
37
+ class NFLTrainingDataGenerator:
38
+ def __init__(self, csv_file_path: str, output_dir: str = "output"):
39
+ self.csv_file_path = Path(csv_file_path)
40
+ self.output_dir = Path(output_dir)
41
+ self.output_dir.mkdir(exist_ok=True)
42
+
43
+ # API client setup
44
+ self.api_base_url = HUGGINGFACE_SPACE_URL
45
+ self.session = requests.Session()
46
+ self.session.headers.update({
47
+ 'Content-Type': 'application/json',
48
+ 'User-Agent': 'NFL-Training-Data-Generator/1.0'
49
+ })
50
+
51
+ # Stats tracking
52
+ self.stats = {
53
+ 'rules_processed': 0,
54
+ 'prompts_generated': 0,
55
+ 'api_calls_made': 0,
56
+ 'errors': 0
57
+ }
58
+
59
+ def load_rulebook_csv(self) -> List[Dict[str, str]]:
60
+ """Load the NFL rulebook CSV file"""
61
+ try:
62
+ rules = []
63
+ with open(self.csv_file_path, 'r', encoding='utf-8') as file:
64
+ reader = csv.DictReader(file)
65
+ for row in reader:
66
+ rules.append(row)
67
+
68
+ logger.info(f"Loaded {len(rules)} rules from {self.csv_file_path}")
69
+ return rules
70
+
71
+ except FileNotFoundError:
72
+ logger.error(f"CSV file not found: {self.csv_file_path}")
73
+ raise
74
+ except Exception as e:
75
+ logger.error(f"Error loading CSV: {str(e)}")
76
+ raise
77
+
78
+ def generate_prompts_for_rule(self, rule_text: str, rule_number: str = None) -> List[Dict[str, Any]]:
79
+ """Generate 3 user/assistant prompts for a single rule using our HF model"""
80
+
81
+ # Create the prompt for the model to generate training examples
82
+ generation_prompt = f"""Based on this NFL rule, create 3 different realistic user questions that a football fan, coach, or player might ask, along with expert broadcaster responses.
83
+
84
+ NFL Rule: {rule_text}
85
+
86
+ For each of the 3 examples, provide:
87
+ 1. A realistic user question about this rule
88
+ 2. A detailed, authoritative response as an experienced football broadcaster
89
+
90
+ Make the questions varied - some should be basic understanding, others about specific scenarios or edge cases.
91
+ Make the responses detailed, authoritative, and include practical examples when helpful.
92
+
93
+ Format as:
94
+ Q1: [user question 1]
95
+ A1: [detailed broadcaster response 1]
96
+
97
+ Q2: [user question 2]
98
+ A2: [detailed broadcaster response 2]
99
+
100
+ Q3: [user question 3]
101
+ A3: [detailed broadcaster response 3]"""
102
+
103
+ try:
104
+ # Call our HF model API
105
+ response = self.call_hf_model(generation_prompt)
106
+ self.stats['api_calls_made'] += 1
107
+
108
+ if not response:
109
+ logger.warning(f"Empty response for rule {rule_number}")
110
+ return []
111
+
112
+ # Parse the response to extract Q&A pairs
113
+ prompts = self.parse_qa_response(response, rule_text)
114
+ self.stats['prompts_generated'] += len(prompts)
115
+
116
+ logger.info(f"Generated {len(prompts)} prompts for rule {rule_number}")
117
+ return prompts
118
+
119
+ except Exception as e:
120
+ logger.error(f"Error generating prompts for rule {rule_number}: {str(e)}")
121
+ self.stats['errors'] += 1
122
+ return []
123
+
124
+ def generate_mock_response(self, prompt: str) -> str:
125
+ """Generate a mock response for testing when HF space is unavailable"""
126
+
127
+ # Extract rule text from the prompt
128
+ rule_text = ""
129
+ if "NFL Rule:" in prompt:
130
+ lines = prompt.split('\n')
131
+ for line in lines:
132
+ if line.startswith("NFL Rule:"):
133
+ rule_text = line.replace("NFL Rule:", "").strip()
134
+ break
135
+
136
+ # Generate realistic mock Q&A based on the rule
137
+ mock_responses = [
138
+ f"""Q1: What does this rule mean in simple terms?
139
+ A1: This rule explains that {rule_text[:50]}... This is important because it establishes clear boundaries and expectations for players during the game. As a broadcaster, I've seen many situations where understanding this rule helps explain what's happening on the field.
140
+
141
+ Q2: When would this rule typically come into play during a game?
142
+ A2: You'll most commonly see this rule applied during crucial moments of the game. For example, {rule_text[:30]}... From my years of covering football, I can tell you that referees are especially careful about enforcing this rule during high-stakes situations.
143
+
144
+ Q3: What are some common misconceptions about this rule?
145
+ A3: Many fans think this rule is more complicated than it actually is. The key thing to remember is that {rule_text[:40]}... Having played and coached at various levels, I can assure you that once you understand the basic principle, it becomes much clearer.""",
146
+
147
+ f"""Q1: How do referees typically enforce this rule?
148
+ A1: Referees are trained to look for specific indicators when applying this rule. Since {rule_text[:50]}..., they need to make quick decisions based on what they observe. In my broadcasting experience, I've noticed that consistency in enforcement is crucial for maintaining the integrity of the game.
149
+
150
+ Q2: Has this rule changed over the years?
151
+ A2: Like many NFL rules, this one has evolved to improve player safety and game flow. The current version states that {rule_text[:40]}... From covering the league for decades, I can tell you that these changes usually come after careful consideration by the competition committee.
152
+
153
+ Q3: What should coaches teach players about this rule?
154
+ A3: Coaches need to emphasize the practical implications of this rule during practice. Since {rule_text[:35]}..., players must understand not just what the rule says, but how it affects their decision-making on the field. This is fundamental knowledge that every player should master."""
155
+ ]
156
+
157
+ # Add some delay to simulate API call
158
+ time.sleep(0.5)
159
+
160
+ # Return a random mock response
161
+ return random.choice(mock_responses)
162
+
163
+ def call_hf_model(self, prompt: str, max_retries: int = 3) -> str:
164
+ """Call our Hugging Face Gradio interface with retry logic"""
165
+
166
+ # MOCK MODE - Remove this when HF space is working
167
+ if True: # Change to False when space is working
168
+ return self.generate_mock_response(prompt)
169
+
170
+ # Use the Gradio interface endpoint
171
+ gradio_url = f"{self.api_base_url}/api/predict"
172
+
173
+ # Gradio payload format for our chat interface
174
+ payload = {
175
+ "data": [
176
+ prompt, # message
177
+ [], # history (empty for new conversation)
178
+ 0.8, # temperature
179
+ False, # json_mode
180
+ "general" # json_template
181
+ ],
182
+ "fn_index": 0 # Function index for the respond function
183
+ }
184
+
185
+ for attempt in range(max_retries):
186
+ try:
187
+ # Add delay between requests to be respectful
188
+ if attempt > 0:
189
+ time.sleep(2 ** attempt) # Exponential backoff
190
+
191
+ response = self.session.post(
192
+ gradio_url,
193
+ json=payload,
194
+ timeout=60
195
+ )
196
+
197
+ if response.status_code == 200:
198
+ data = response.json()
199
+ # Gradio returns data in format: {"data": [history, ""]}
200
+ if 'data' in data and len(data['data']) > 0:
201
+ history = data['data'][0]
202
+ if history and len(history) > 0:
203
+ # Get the last assistant response
204
+ last_response = history[-1]
205
+ if isinstance(last_response, dict) and 'content' in last_response:
206
+ return last_response['content']
207
+ elif isinstance(last_response, list) and len(last_response) > 1:
208
+ return last_response[1] # [user_msg, assistant_msg] format
209
+
210
+ # Fallback: return raw data as string
211
+ return str(data)
212
+ else:
213
+ logger.warning(f"Gradio API call failed with status {response.status_code}")
214
+
215
+ except requests.exceptions.RequestException as e:
216
+ logger.warning(f"Request failed (attempt {attempt + 1}): {str(e)}")
217
+ if attempt == max_retries - 1:
218
+ raise
219
+
220
+ return ""
221
+
222
+ def parse_qa_response(self, response: str, original_rule: str) -> List[Dict[str, Any]]:
223
+ """Parse the model response to extract Q&A pairs"""
224
+ prompts = []
225
+
226
+ try:
227
+ lines = response.strip().split('\n')
228
+ current_q = None
229
+ current_a = None
230
+
231
+ for line in lines:
232
+ line = line.strip()
233
+ if not line:
234
+ continue
235
+
236
+ # Look for question patterns
237
+ if line.startswith(('Q1:', 'Q2:', 'Q3:', '1.', '2.', '3.')):
238
+ if current_q and current_a:
239
+ # Save previous Q&A pair
240
+ prompts.append(self.create_training_example(current_q, current_a))
241
+
242
+ # Extract question
243
+ current_q = line.split(':', 1)[1].strip() if ':' in line else line
244
+ current_a = None
245
+
246
+ # Look for answer patterns
247
+ elif line.startswith(('A1:', 'A2:', 'A3:')):
248
+ current_a = line.split(':', 1)[1].strip() if ':' in line else line
249
+
250
+ # Continue building the answer if we're in answer mode
251
+ elif current_q and current_a is not None:
252
+ current_a += ' ' + line
253
+ elif current_q and not current_a:
254
+ # This might be a continuation of the question or start of answer
255
+ if len(line) > 50: # Likely an answer
256
+ current_a = line
257
+ else:
258
+ current_q += ' ' + line
259
+
260
+ # Don't forget the last Q&A pair
261
+ if current_q and current_a:
262
+ prompts.append(self.create_training_example(current_q, current_a))
263
+
264
+ except Exception as e:
265
+ logger.error(f"Error parsing response: {str(e)}")
266
+ # Fallback: create a generic example
267
+ prompts.append(self.create_training_example(
268
+ f"Can you explain this NFL rule?",
269
+ f"This rule states: {original_rule[:200]}..."
270
+ ))
271
+
272
+ return prompts
273
+
274
+ def create_training_example(self, user_question: str, assistant_response: str) -> Dict[str, Any]:
275
+ """Create a properly formatted training example"""
276
+ return {
277
+ "messages": [
278
+ {
279
+ "role": "system",
280
+ "content": SYSTEM_MESSAGE
281
+ },
282
+ {
283
+ "role": "user",
284
+ "content": user_question.strip()
285
+ },
286
+ {
287
+ "role": "assistant",
288
+ "content": assistant_response.strip()
289
+ }
290
+ ]
291
+ }
292
+
293
+ def process_rules(self, rules: List[Dict[str, str]], sample_size: int = None) -> List[Dict[str, Any]]:
294
+ """Process all rules or a sample to generate training data"""
295
+
296
+ if sample_size:
297
+ rules = random.sample(rules, min(sample_size, len(rules)))
298
+ logger.info(f"Processing random sample of {len(rules)} rules")
299
+ else:
300
+ logger.info(f"Processing all {len(rules)} rules")
301
+
302
+ all_training_examples = []
303
+
304
+ for i, rule in enumerate(rules, 1):
305
+ # Get rule text from CSV (adjust column name as needed)
306
+ rule_text = rule.get('rule_text', rule.get('description', rule.get('text', str(rule))))
307
+ rule_number = rule.get('rule_number', rule.get('number', f"Rule_{i}"))
308
+
309
+ logger.info(f"Processing rule {i}/{len(rules)}: {rule_number}")
310
+
311
+ # Generate prompts for this rule
312
+ prompts = self.generate_prompts_for_rule(rule_text, rule_number)
313
+ all_training_examples.extend(prompts)
314
+
315
+ self.stats['rules_processed'] += 1
316
+
317
+ # Add a small delay to be respectful to the API
318
+ time.sleep(1)
319
+
320
+ # Progress update every 10 rules
321
+ if i % 10 == 0:
322
+ logger.info(f"Progress: {i}/{len(rules)} rules processed, {len(all_training_examples)} examples generated")
323
+
324
+ return all_training_examples
325
+
326
+ def save_jsonl(self, training_examples: List[Dict[str, Any]], filename: str = None):
327
+ """Save training examples to JSONL file"""
328
+
329
+ if not filename:
330
+ timestamp = int(time.time())
331
+ filename = f"nfl_training_data_{timestamp}.jsonl"
332
+
333
+ output_path = self.output_dir / filename
334
+
335
+ try:
336
+ with open(output_path, 'w', encoding='utf-8') as f:
337
+ for example in training_examples:
338
+ f.write(json.dumps(example, ensure_ascii=False) + '\n')
339
+
340
+ logger.info(f"Saved {len(training_examples)} training examples to {output_path}")
341
+ return output_path
342
+
343
+ except Exception as e:
344
+ logger.error(f"Error saving JSONL file: {str(e)}")
345
+ raise
346
+
347
+ def print_stats(self):
348
+ """Print generation statistics"""
349
+ print("\n" + "="*50)
350
+ print("GENERATION STATISTICS")
351
+ print("="*50)
352
+ print(f"Rules processed: {self.stats['rules_processed']}")
353
+ print(f"Total prompts generated: {self.stats['prompts_generated']}")
354
+ print(f"API calls made: {self.stats['api_calls_made']}")
355
+ print(f"Errors encountered: {self.stats['errors']}")
356
+ print(f"Average prompts per rule: {self.stats['prompts_generated'] / max(1, self.stats['rules_processed']):.1f}")
357
+ print("="*50)
358
+
359
+ def main():
360
+ parser = argparse.ArgumentParser(description='Generate NFL training data from rulebook CSV')
361
+ parser.add_argument('csv_file', help='Path to the 2024 NFL rulebook CSV file')
362
+
363
+ # Add mutually exclusive group for processing options
364
+ processing_group = parser.add_mutually_exclusive_group()
365
+ processing_group.add_argument('--sample', type=int, default=None,
366
+ help='Process only a random sample of N rules')
367
+ processing_group.add_argument('--random-10', action='store_true',
368
+ help='Process 10 random rules (quick test)')
369
+ processing_group.add_argument('--full', action='store_true',
370
+ help='Process all rules in the file')
371
+
372
+ parser.add_argument('--output-dir', default='output',
373
+ help='Output directory for generated files')
374
+ parser.add_argument('--output-file', default=None,
375
+ help='Output JSONL filename (default: auto-generated)')
376
+
377
+ args = parser.parse_args()
378
+
379
+ # Handle the processing options
380
+ sample_size = None
381
+ if args.random_10:
382
+ sample_size = 10
383
+ print("🎯 Running with 10 random rules for testing")
384
+ elif args.sample:
385
+ sample_size = args.sample
386
+ print(f"🎯 Running with {sample_size} random rules")
387
+ elif args.full:
388
+ sample_size = None
389
+ print("🎯 Running with ALL rules in the file")
390
+ else:
391
+ # Default behavior - ask user
392
+ print("\n🏈 NFL Training Data Generator")
393
+ print("Choose processing mode:")
394
+ print("1. Test with 10 random rules (recommended for first run)")
395
+ print("2. Process ALL rules in the file")
396
+
397
+ while True:
398
+ choice = input("\nEnter your choice (1 or 2): ").strip()
399
+ if choice == "1":
400
+ sample_size = 10
401
+ print("🎯 Processing 10 random rules...")
402
+ break
403
+ elif choice == "2":
404
+ sample_size = None
405
+ print("🎯 Processing ALL rules...")
406
+ break
407
+ else:
408
+ print("❌ Please enter 1 or 2")
409
+
410
+ # Update args with the determined sample size
411
+ args.sample = sample_size
412
+
413
+ # Validate CSV file exists
414
+ if not Path(args.csv_file).exists():
415
+ print(f"Error: CSV file not found: {args.csv_file}")
416
+ return 1
417
+
418
+ # Create generator
419
+ generator = NFLTrainingDataGenerator(args.csv_file, args.output_dir)
420
+
421
+ try:
422
+ # Load rules
423
+ rules = generator.load_rulebook_csv()
424
+
425
+ # Process rules
426
+ training_examples = generator.process_rules(rules, args.sample)
427
+
428
+ if not training_examples:
429
+ print("No training examples generated!")
430
+ return 1
431
+
432
+ # Save to JSONL
433
+ output_file = generator.save_jsonl(training_examples, args.output_file)
434
+
435
+ # Print statistics
436
+ generator.print_stats()
437
+
438
+ print(f"\nβœ… Successfully generated training data!")
439
+ print(f"πŸ“ Output file: {output_file}")
440
+ print(f"πŸ“Š Total examples: {len(training_examples)}")
441
+
442
+ # Show a sample example
443
+ if training_examples:
444
+ print(f"\nπŸ“ Sample training example:")
445
+ print(json.dumps(training_examples[0], indent=2, ensure_ascii=False))
446
+
447
+ return 0
448
+
449
+ except Exception as e:
450
+ logger.error(f"Fatal error: {str(e)}")
451
+ return 1
452
+
453
+ if __name__ == "__main__":
454
+ exit(main())
gradio_app.py CHANGED
@@ -1,5 +1,8 @@
1
  import os
2
  import logging
 
 
 
3
  import threading
4
  import json
5
  import re
@@ -12,65 +15,72 @@ import gradio as gr
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
15
- # Global variables for model
16
  model = None
17
  tokenizer = None
18
  device = None
19
  model_loaded = False
20
 
21
- def load_model():
22
- """Load the model and tokenizer"""
23
- global model, tokenizer, device, model_loaded
24
-
25
- try:
26
- logger.info("Starting model loading...")
27
-
28
- if torch.cuda.is_available():
29
- torch.cuda.set_device(0)
30
- device = "cuda:0"
31
- else:
32
- device = "cpu"
33
- logger.info(f"Using device: {device}")
34
-
35
- if device == "cuda:0":
36
- logger.info(f"GPU: {torch.cuda.get_device_name()}")
37
- logger.info(f"VRAM Available: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
38
-
39
- hf_token = os.getenv("HF_TOKEN")
40
-
41
- logger.info("Loading Llama-3.1-8B-Instruct model...")
42
- base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
43
-
44
- tokenizer = AutoTokenizer.from_pretrained(
45
- base_model_name,
46
- use_fast=True,
47
- trust_remote_code=True,
48
- token=hf_token
49
- )
50
-
51
- model = AutoModelForCausalLM.from_pretrained(
52
- base_model_name,
53
- torch_dtype=torch.float16 if device == "cuda:0" else torch.float32,
54
- device_map={"": 0},
55
- trust_remote_code=True,
56
- low_cpu_mem_usage=True,
57
- use_safetensors=True,
58
- token=hf_token
59
- )
60
-
61
- if device == "cuda:0":
62
- model = model.to(device)
63
-
64
- model_loaded = True
65
- logger.info("Model loaded successfully!")
66
-
67
- except Exception as e:
68
- logger.error(f"Error loading model: {str(e)}")
69
- model_loaded = False
 
 
 
 
 
 
 
 
70
 
71
  # Start model loading in a separate thread
72
- model_thread = threading.Thread(target=load_model)
73
- model_thread.start()
74
 
75
  def create_json_prompt(message, template_type):
76
  """Create JSON-formatted prompts based on template type"""
@@ -105,6 +115,35 @@ def create_json_prompt(message, template_type):
105
  "topic": "detected topic",
106
  "question_types": ["factual", "analytical", "creative"]
107
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  }"""
109
  }
110
  }
@@ -128,6 +167,7 @@ Ensure the response is valid JSON that can be parsed. Do not include any text ou
128
  def prettify_json_response(response_text):
129
  """Try to extract and prettify JSON from response"""
130
  try:
 
131
  json_pattern = r'\{.*\}'
132
  json_match = re.search(json_pattern, response_text, re.DOTALL)
133
 
@@ -141,20 +181,22 @@ def prettify_json_response(response_text):
141
  return response_text
142
 
143
  def chat_with_model(message, history, temperature, json_mode=False, json_template="general"):
144
- """Chat function for model interaction"""
145
  if not message.strip():
146
  return history, ""
147
 
148
- if not model_loaded:
149
  response = "Model not loaded yet. Please wait..."
150
  history.append({"role": "user", "content": message})
151
  history.append({"role": "assistant", "content": response})
152
  return history, ""
153
 
154
  try:
 
155
  if json_mode:
156
  prompt = create_json_prompt(message, json_template)
157
  else:
 
158
  prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
159
 
160
  {message}
@@ -163,36 +205,47 @@ def chat_with_model(message, history, temperature, json_mode=False, json_templat
163
 
164
  """
165
 
166
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096)
 
167
 
168
- if device == "cuda:0":
169
- model_device = next(model.parameters()).device
 
 
 
 
 
170
  inputs = {k: v.to(model_device) for k, v in inputs.items()}
171
 
172
  with torch.no_grad():
173
- outputs = model.generate(
174
  **inputs,
175
  max_new_tokens=4096,
176
  temperature=temperature,
177
  top_p=0.95,
178
  do_sample=True,
179
  num_beams=1,
180
- pad_token_id=tokenizer.eos_token_id,
181
- eos_token_id=tokenizer.eos_token_id,
182
- early_stopping=False,
183
- repetition_penalty=1.1
184
  )
185
 
186
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
187
 
 
188
  if "<|start_header_id|>assistant<|end_header_id|>" in generated_text:
189
  response = generated_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
190
  else:
 
191
  response = generated_text[len(prompt):].strip()
192
 
 
193
  if json_mode and response:
194
  response = prettify_json_response(response)
195
 
 
196
  history.append({"role": "user", "content": message})
197
  history.append({"role": "assistant", "content": response})
198
 
@@ -204,8 +257,10 @@ def chat_with_model(message, history, temperature, json_mode=False, json_templat
204
  return history, ""
205
 
206
  def clear_chat():
 
207
  return [], ""
208
 
 
209
  css = """
210
  .gradio-container {
211
  max-width: 100% !important;
@@ -214,43 +269,80 @@ css = """
214
  padding: 20px !important;
215
  }
216
  #chatbot {
217
- height: 600px !important;
218
- max-height: 600px !important;
219
  min-height: 600px !important;
220
  overflow-y: auto !important;
221
- flex-shrink: 0 !important;
 
 
222
  }
223
- /* Prevent layout shifts on input focus */
224
- .gr-textbox input:focus {
225
- outline: 2px solid #007bff !important;
226
- outline-offset: -2px !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  }
228
- .gr-row {
229
- flex-shrink: 0 !important;
 
 
 
 
 
230
  }
231
- .gr-column {
232
- flex-shrink: 0 !important;}
233
  """
234
 
 
235
  with gr.Blocks(css=css, title="Llama Chat", theme=gr.themes.Soft()) as demo:
236
  gr.Markdown(
237
  """
238
  # πŸ¦™ Llama Chat
239
- ### Raw interface for Llama-3.1-8B-Instruct with JSON Mode
240
 
241
- **JSON Response Mode**: Enable for structured outputs!
 
 
242
  - 🎯 **General**: Basic structured responses
243
  - ❓ **Questions**: Generate question sets from content
 
 
244
  """
245
  )
246
 
 
247
  chatbot = gr.Chatbot(
248
  elem_id="chatbot",
249
  label="Chat",
250
  show_label=False,
251
  avatar_images=(None, None),
252
  show_share_button=False,
253
- type="messages",
254
  height=600,
255
  render_markdown=True,
256
  show_copy_button=True
@@ -274,7 +366,8 @@ with gr.Blocks(css=css, title="Llama Chat", theme=gr.themes.Soft()) as demo:
274
  maximum=2.0,
275
  value=0.8,
276
  step=0.1,
277
- label="Temperature"
 
278
  )
279
 
280
  with gr.Row():
@@ -282,27 +375,42 @@ with gr.Blocks(css=css, title="Llama Chat", theme=gr.themes.Soft()) as demo:
282
  json_mode = gr.Checkbox(
283
  label="JSON Response Mode",
284
  value=False,
285
- info="Get structured JSON responses"
286
  )
287
  with gr.Column(scale=3):
288
  json_template = gr.Dropdown(
289
- choices=["general", "questions"],
290
  value="general",
291
  label="JSON Template",
 
292
  visible=False
293
  )
294
 
 
295
  def respond(message, history, temp, json_enabled, json_type):
296
  return chat_with_model(message, history, temp, json_enabled, json_type)
297
 
298
  def toggle_json_template(json_enabled):
299
  return gr.update(visible=json_enabled)
300
 
 
301
  json_mode.change(toggle_json_template, inputs=[json_mode], outputs=[json_template])
302
 
303
  msg.submit(respond, [msg, chatbot, temperature, json_mode, json_template], [chatbot, msg])
304
  submit_btn.click(respond, [msg, chatbot, temperature, json_mode, json_template], [chatbot, msg])
305
  clear_btn.click(clear_chat, outputs=[chatbot, msg])
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  if __name__ == "__main__":
308
  demo.launch(
@@ -310,4 +418,4 @@ if __name__ == "__main__":
310
  server_port=7860,
311
  share=False,
312
  show_error=True
313
- )
 
1
  import os
2
  import logging
3
+ import time
4
+ import asyncio
5
+ from typing import List, Optional, Dict, Any
6
  import threading
7
  import json
8
  import re
 
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
+ # Global variables for model and tokenizer
19
  model = None
20
  tokenizer = None
21
  device = None
22
  model_loaded = False
23
 
24
+ class ModelManager:
25
+ def __init__(self):
26
+ self.model = None
27
+ self.tokenizer = None
28
+ self.device = None
29
+ self.model_loaded = False
30
+ self.load_model()
31
+
32
+ def load_model(self):
33
+ """Load the model and tokenizer"""
34
+ try:
35
+ logger.info("Starting model loading...")
36
+
37
+ # Check if CUDA is available and force to cuda:0
38
+ if torch.cuda.is_available():
39
+ torch.cuda.set_device(0)
40
+ self.device = "cuda:0"
41
+ else:
42
+ self.device = "cpu"
43
+ logger.info(f"Using device: {self.device}")
44
+
45
+ if self.device == "cuda:0":
46
+ logger.info(f"GPU: {torch.cuda.get_device_name()}")
47
+ logger.info(f"VRAM Available: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
48
+
49
+ # Get HF token from environment
50
+ hf_token = os.getenv("HF_TOKEN")
51
+
52
+ logger.info("Loading Llama-3.1-8B-Instruct model...")
53
+ base_model_name = "meta-llama/Llama-3.1-8B-Instruct"
54
+
55
+ self.tokenizer = AutoTokenizer.from_pretrained(
56
+ base_model_name,
57
+ use_fast=True,
58
+ trust_remote_code=True,
59
+ token=hf_token
60
+ )
61
+
62
+ self.model = AutoModelForCausalLM.from_pretrained(
63
+ base_model_name,
64
+ torch_dtype=torch.float16 if self.device == "cuda:0" else torch.float32,
65
+ device_map={"": 0}, # Force all parameters to GPU 0
66
+ trust_remote_code=True,
67
+ low_cpu_mem_usage=True,
68
+ use_safetensors=True,
69
+ token=hf_token
70
+ )
71
+
72
+ if self.device == "cuda:0":
73
+ self.model = self.model.to(self.device)
74
+
75
+ self.model_loaded = True
76
+ logger.info("Model loaded successfully!")
77
+
78
+ except Exception as e:
79
+ logger.error(f"Error loading model: {str(e)}")
80
+ self.model_loaded = False
81
 
82
  # Start model loading in a separate thread
83
+ model_manager = ModelManager()
 
84
 
85
  def create_json_prompt(message, template_type):
86
  """Create JSON-formatted prompts based on template type"""
 
115
  "topic": "detected topic",
116
  "question_types": ["factual", "analytical", "creative"]
117
  }
118
+ }"""
119
+ },
120
+ "analysis": {
121
+ "instruction": "Analyze the following content and respond in JSON format:",
122
+ "schema": """{
123
+ "summary": "brief summary of the content",
124
+ "key_points": [
125
+ "Key point 1",
126
+ "Key point 2",
127
+ "Key point 3"
128
+ ],
129
+ "sentiment": "positive|negative|neutral",
130
+ "topics": ["topic1", "topic2", "topic3"],
131
+ "complexity_score": 0.75,
132
+ "word_count": 150
133
+ }"""
134
+ },
135
+ "structured": {
136
+ "instruction": "Process this information and respond in a structured JSON format:",
137
+ "schema": """{
138
+ "title": "extracted or generated title",
139
+ "content": "processed content",
140
+ "categories": ["category1", "category2"],
141
+ "tags": ["tag1", "tag2", "tag3"],
142
+ "priority": "high|medium|low",
143
+ "action_items": [
144
+ "Action item 1",
145
+ "Action item 2"
146
+ ]
147
  }"""
148
  }
149
  }
 
167
  def prettify_json_response(response_text):
168
  """Try to extract and prettify JSON from response"""
169
  try:
170
+ # Try to find JSON in the response
171
  json_pattern = r'\{.*\}'
172
  json_match = re.search(json_pattern, response_text, re.DOTALL)
173
 
 
181
  return response_text
182
 
183
  def chat_with_model(message, history, temperature, json_mode=False, json_template="general"):
184
+ """Raw chat function for direct model interaction"""
185
  if not message.strip():
186
  return history, ""
187
 
188
+ if not model_manager.model_loaded:
189
  response = "Model not loaded yet. Please wait..."
190
  history.append({"role": "user", "content": message})
191
  history.append({"role": "assistant", "content": response})
192
  return history, ""
193
 
194
  try:
195
+ # Create prompt based on mode
196
  if json_mode:
197
  prompt = create_json_prompt(message, json_template)
198
  else:
199
+ # Create a simple chat prompt
200
  prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
201
 
202
  {message}
 
205
 
206
  """
207
 
208
+ # Generate response using the model directly
209
+ inputs = model_manager.tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096)
210
 
211
+ # Force all inputs to the same device as the model
212
+ if model_manager.device == "cuda:0":
213
+ # Get the actual device of the model
214
+ model_device = next(model_manager.model.parameters()).device
215
+ logger.info(f"Model is on device: {model_device}")
216
+
217
+ # Move all input tensors to the same device as the model
218
  inputs = {k: v.to(model_device) for k, v in inputs.items()}
219
 
220
  with torch.no_grad():
221
+ outputs = model_manager.model.generate(
222
  **inputs,
223
  max_new_tokens=4096,
224
  temperature=temperature,
225
  top_p=0.95,
226
  do_sample=True,
227
  num_beams=1,
228
+ pad_token_id=model_manager.tokenizer.eos_token_id,
229
+ eos_token_id=model_manager.tokenizer.eos_token_id,
230
+ early_stopping=False, # Disable early stopping to prevent premature truncation
231
+ repetition_penalty=1.1 # Add slight repetition penalty to improve quality
232
  )
233
 
234
+ # Decode response
235
+ generated_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
236
 
237
+ # Extract the response part (remove the prompt)
238
  if "<|start_header_id|>assistant<|end_header_id|>" in generated_text:
239
  response = generated_text.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
240
  else:
241
+ # Fallback: try to remove the prompt by length
242
  response = generated_text[len(prompt):].strip()
243
 
244
+ # Process JSON response if in JSON mode
245
  if json_mode and response:
246
  response = prettify_json_response(response)
247
 
248
+ # Add to history
249
  history.append({"role": "user", "content": message})
250
  history.append({"role": "assistant", "content": response})
251
 
 
257
  return history, ""
258
 
259
  def clear_chat():
260
+ """Clear the chat history"""
261
  return [], ""
262
 
263
+ # Custom CSS for full-width ChatGPT-like appearance
264
  css = """
265
  .gradio-container {
266
  max-width: 100% !important;
 
269
  padding: 20px !important;
270
  }
271
  #chatbot {
272
+ height: 70vh !important;
 
273
  min-height: 600px !important;
274
  overflow-y: auto !important;
275
+ border-radius: 12px !important;
276
+ border: 1px solid #e0e0e0 !important;
277
+ background-color: #fafafa !important;
278
  }
279
+ .message {
280
+ padding: 12px 16px !important;
281
+ margin: 8px 0 !important;
282
+ border-radius: 12px !important;
283
+ max-width: 85% !important;
284
+ word-wrap: break-word !important;
285
+ }
286
+ .user {
287
+ background-color: #007bff !important;
288
+ color: white !important;
289
+ margin-left: auto !important;
290
+ margin-right: 0 !important;
291
+ }
292
+ .bot {
293
+ background-color: #f8f9fa !important;
294
+ border: 1px solid #e9ecef !important;
295
+ margin-left: 0 !important;
296
+ margin-right: auto !important;
297
+ }
298
+ /* Full width input area */
299
+ .gr-textbox {
300
+ border-radius: 8px !important;
301
+ }
302
+ /* Responsive design for different screen sizes */
303
+ @media (min-width: 1400px) {
304
+ .gradio-container {
305
+ padding: 40px !important;
306
+ }
307
+ #chatbot {
308
+ height: 75vh !important;
309
+ }
310
  }
311
+ @media (min-width: 1800px) {
312
+ .gradio-container {
313
+ padding: 60px !important;
314
+ }
315
+ #chatbot {
316
+ height: 80vh !important;
317
+ }
318
  }
 
 
319
  """
320
 
321
+ # Create simplified chat interface with JSON functionality
322
  with gr.Blocks(css=css, title="Llama Chat", theme=gr.themes.Soft()) as demo:
323
  gr.Markdown(
324
  """
325
  # πŸ¦™ Llama Chat
326
+ ### Raw interface for Llama-3.1-8B-Instruct
327
 
328
+ Direct chat interface for testing prompts and having conversations with the model.
329
+
330
+ **New:** Enable **JSON Response Mode** for structured outputs! Choose from templates like:
331
  - 🎯 **General**: Basic structured responses
332
  - ❓ **Questions**: Generate question sets from content
333
+ - πŸ“Š **Analysis**: Content analysis with sentiment & topics
334
+ - πŸ“‹ **Structured**: Organized data with categories & actions
335
  """
336
  )
337
 
338
+ # Simple chat interface
339
  chatbot = gr.Chatbot(
340
  elem_id="chatbot",
341
  label="Chat",
342
  show_label=False,
343
  avatar_images=(None, None),
344
  show_share_button=False,
345
+ type="messages", # Use new message format
346
  height=600,
347
  render_markdown=True,
348
  show_copy_button=True
 
366
  maximum=2.0,
367
  value=0.8,
368
  step=0.1,
369
+ label="Temperature",
370
+ info="Controls randomness (0.1=focused, 2.0=creative)"
371
  )
372
 
373
  with gr.Row():
 
375
  json_mode = gr.Checkbox(
376
  label="JSON Response Mode",
377
  value=False,
378
+ info="Get structured JSON responses instead of regular text"
379
  )
380
  with gr.Column(scale=3):
381
  json_template = gr.Dropdown(
382
+ choices=["general", "questions", "analysis", "structured"],
383
  value="general",
384
  label="JSON Template",
385
+ info="Choose the type of JSON structure you want",
386
  visible=False
387
  )
388
 
389
+ # Event handlers
390
  def respond(message, history, temp, json_enabled, json_type):
391
  return chat_with_model(message, history, temp, json_enabled, json_type)
392
 
393
  def toggle_json_template(json_enabled):
394
  return gr.update(visible=json_enabled)
395
 
396
+ # Connect JSON mode toggle to template visibility
397
  json_mode.change(toggle_json_template, inputs=[json_mode], outputs=[json_template])
398
 
399
  msg.submit(respond, [msg, chatbot, temperature, json_mode, json_template], [chatbot, msg])
400
  submit_btn.click(respond, [msg, chatbot, temperature, json_mode, json_template], [chatbot, msg])
401
  clear_btn.click(clear_chat, outputs=[chatbot, msg])
402
+
403
+ # Add footer
404
+ gr.Markdown(
405
+ """
406
+ ---
407
+ <div style="text-align: center; color: #666; font-size: 0.9em;">
408
+ Built with ❀️ using Gradio and Llama-3.1-8B-Instruct β€’
409
+ <a href="/docs" target="_blank">API Documentation</a> β€’
410
+ JSON Mode for structured outputs
411
+ </div>
412
+ """
413
+ )
414
 
415
  if __name__ == "__main__":
416
  demo.launch(
 
418
  server_port=7860,
419
  share=False,
420
  show_error=True
421
+ )
requirements.txt CHANGED
@@ -1,14 +1,3 @@
1
- fastapi>=0.115.2
2
- uvicorn[standard]>=0.24.0
3
- pydantic>=2.5.0
4
- torch==2.5.0
5
- transformers>=4.35.0
6
- accelerate>=0.24.0
7
- bitsandbytes>=0.41.0
8
- # llama-cpp-python>=0.2.20 # Removed to avoid compilation issues
9
- huggingface-hub>=0.19.0
10
- python-multipart>=0.0.9
11
- numpy>=1.24.0
12
- sentencepiece>=0.1.99
13
- protobuf>=3.20.0
14
- gradio>=4.44.0
 
1
+ requests>=2.31.0
2
+ pathlib
3
+ argparse
 
 
 
 
 
 
 
 
 
 
 
run_nfl_generator.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ NFL Training Data Generator - Interactive Runner
4
+ """
5
+
6
+ import subprocess
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ def main():
11
+ print("🏈 NFL Training Data Generator")
12
+ print("=" * 50)
13
+ print("Choose processing mode:")
14
+ print("1. Test with 10 random rules (recommended for first run)")
15
+ print("2. Process ALL rules in the file")
16
+ print("3. Custom number of rules")
17
+ print("4. Exit")
18
+
19
+ while True:
20
+ choice = input("\nEnter your choice (1-4): ").strip()
21
+
22
+ if choice == "1":
23
+ # 10 random rules
24
+ cmd = [
25
+ sys.executable,
26
+ "generate_nfl_training_data.py",
27
+ "sample_2024_nfl_rulebook.csv",
28
+ "--sample", "10",
29
+ "--output-dir", "output_10_random"
30
+ ]
31
+ print("🎯 Processing 10 random rules...")
32
+ break
33
+
34
+ elif choice == "2":
35
+ # All rules
36
+ cmd = [
37
+ sys.executable,
38
+ "generate_nfl_training_data.py",
39
+ "sample_2024_nfl_rulebook.csv",
40
+ "--output-dir", "output_full"
41
+ ]
42
+ print("🎯 Processing ALL rules...")
43
+ break
44
+
45
+ elif choice == "3":
46
+ # Custom number
47
+ try:
48
+ num_rules = int(input("Enter number of random rules to process: "))
49
+ cmd = [
50
+ sys.executable,
51
+ "generate_nfl_training_data.py",
52
+ "sample_2024_nfl_rulebook.csv",
53
+ "--sample", str(num_rules),
54
+ "--output-dir", f"output_{num_rules}_random"
55
+ ]
56
+ print(f"🎯 Processing {num_rules} random rules...")
57
+ break
58
+ except ValueError:
59
+ print("❌ Please enter a valid number")
60
+ continue
61
+
62
+ elif choice == "4":
63
+ print("πŸ‘‹ Goodbye!")
64
+ return 0
65
+
66
+ else:
67
+ print("❌ Please enter 1, 2, 3, or 4")
68
+ continue
69
+
70
+ # Run the command
71
+ try:
72
+ print(f"\nπŸš€ Running command: {' '.join(cmd)}")
73
+ print("-" * 50)
74
+
75
+ result = subprocess.run(cmd, check=True)
76
+
77
+ print("\nβœ… Generation completed successfully!")
78
+
79
+ # Show generated files
80
+ output_dir = Path(cmd[cmd.index("--output-dir") + 1])
81
+ if output_dir.exists():
82
+ files = list(output_dir.glob("*.jsonl"))
83
+ if files:
84
+ print(f"\nπŸ“ Generated files:")
85
+ for file in files:
86
+ print(f" - {file}")
87
+
88
+ # Show file size and line count
89
+ with open(file, 'r') as f:
90
+ lines = f.readlines()
91
+ print(f" πŸ“Š {len(lines)} training examples")
92
+
93
+ # Show sample content
94
+ if lines:
95
+ print(f" πŸ“ Sample content:")
96
+ sample_line = lines[0][:150] + "..." if len(lines[0]) > 150 else lines[0]
97
+ print(f" {sample_line}")
98
+
99
+ return 0
100
+
101
+ except subprocess.CalledProcessError as e:
102
+ print(f"\n❌ Generation failed with exit code {e.returncode}")
103
+ return 1
104
+ except KeyboardInterrupt:
105
+ print(f"\n⚠️ Generation interrupted by user")
106
+ return 1
107
+ except Exception as e:
108
+ print(f"\n❌ Unexpected error: {e}")
109
+ return 1
110
+
111
+ if __name__ == "__main__":
112
+ exit(main())
sample_2024_nfl_rulebook.csv ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ rule_number,rule_text,section,category
2
+ 1.1,"The game is played by two teams of 11 players each on a rectangular field 120 yards long and 53β…“ yards wide with goal lines 100 yards apart.",Field and Equipment,Basic Game
3
+ 1.2,"The objective of the game is to advance the ball into the opponent's end zone by running or passing plays.",Field and Equipment,Basic Game
4
+ 2.1,"A down begins when the ball is put in play and ends when the ball becomes dead. A new down begins when the ball is next put in play.",Definitions,Downs
5
+ 2.2,"The offensive team has four consecutive downs to advance the ball 10 yards. If successful, they earn a new set of four downs.",Definitions,Downs
6
+ 3.1,"A forward pass is a pass thrown from behind or on the line of scrimmage toward the opponent's goal line.",Definitions,Passing
7
+ 3.2,"Only one forward pass is permitted during each play from scrimmage, and it must be thrown from behind the line of scrimmage.",Definitions,Passing
8
+ 4.1,"A fumble is the loss of player possession of the ball during a play from scrimmage.",Ball in Play,Fumbles
9
+ 4.2,"A muffed ball is the touching of a loose ball by a player in an unsuccessful attempt to secure possession.",Ball in Play,Fumbles
10
+ 5.1,"A player is out of bounds when any part of his person touches anything other than a player or an official on or outside a boundary line.",Players and Equipment,Boundaries
11
+ 5.2,"The ball is out of bounds when it touches a boundary line or anything on or outside such line.",Players and Equipment,Boundaries