Report-Generator / gemma_classifier.py
Jaimodiji's picture
Upload folder using huggingface_hub
c001f24
import os
import json
import requests
import sys
from typing import List, Dict, Any, Optional
from api_key_manager import get_api_key_manager
class GemmaClassifier:
def __init__(self):
# API key will be fetched dynamically via get_api_key_manager
pass
def classify(self, questions: List[str], start_index: int = 0) -> Optional[Dict[str, Any]]:
"""
Classifies a list of questions using the NVIDIA NIM Gemma API.
`questions` should be a list of strings representing the questions to classify.
`start_index` is the overall starting index for this batch (e.g., 0, 7, 14...).
"""
manager = get_api_key_manager()
api_key, key_index = manager.get_key('nvidia')
if not api_key:
raise ValueError("No available NVIDIA API keys. Please set NVIDIA_API_KEY environment variable.")
full_prompt = self._generate_gemma_prompt(questions=questions, start_index=start_index)
url = "https://integrate.api.nvidia.com/v1/chat/completions"
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
payload = {
"model": "google/gemma-3n-e4b-it",
"messages": [{"role": "user", "content": full_prompt}],
"temperature": 0.2,
"max_tokens": 2048,
"stream": False
}
print(f"Sending batch to NVIDIA NIM Gemma API with {len(questions)} questions.")
try:
response = requests.post(url, headers=headers, json=payload, timeout=300)
response.raise_for_status()
response_json = response.json()
if 'choices' in response_json and len(response_json['choices']) > 0:
first_choice = response_json['choices'][0]
if 'message' in first_choice and 'content' in first_choice['message']:
model_output_content = first_choice['message']['content']
if model_output_content.startswith("```json") and model_output_content.endswith("```"):
model_output_content = model_output_content[7:-3].strip()
try:
batch_result = json.loads(model_output_content)
manager.mark_success('nvidia', key_index)
return batch_result
except json.JSONDecodeError as e:
print(f"Error decoding JSON from model output: {e}", file=sys.stderr)
print(f"Model output content: {model_output_content}", file=sys.stderr)
manager.mark_failure('nvidia', key_index)
return None
else:
print("Error: 'message' or 'content' not found in NVIDIA NIM Gemma response choice.", file=sys.stderr)
manager.mark_failure('nvidia', key_index)
return None
else:
print("Error: 'choices' not found or empty in NVIDIA NIM Gemma response.", file=sys.stderr)
manager.mark_failure('nvidia', key_index)
return None
except requests.exceptions.RequestException as e:
print(f"Error during NVIDIA NIM Gemma API call: {repr(e)}", file=sys.stderr)
if e.response is not None:
print(f"Response status code: {e.response.status_code}", file=sys.stderr)
print(f"Response body: {e.response.text}", file=sys.stderr)
manager.mark_failure('nvidia', key_index)
return None
except Exception as e:
print(f"An unexpected error occurred: {e}", file=sys.stderr)
manager.mark_failure('nvidia', key_index)
return None
def _generate_gemma_prompt(self, questions: List[str], start_index: int) -> str:
"""
Generates the detailed prompt for the Gemma classifier, similar to gemini_classifier.py.
"""
input_text = "\n".join([f"{j + start_index + 1}. {q}" for j, q in enumerate(questions)])
prompt = f"""
**System Role:** You are a question classifier for NEET/JEE exams, specialized in mapping questions to their corresponding subjects and chapters from the NCERT syllabus.
Your task is to analyze each question, first classify it into the most relevant subject, and then identify the most relevant chapter(s) from the official syllabus structures provided below.
**Available Subjects (Use these exact titles):**
- Biology
- Chemistry
- Physics
- Mathematics
**Syllabus Chapters (Use these exact titles for the respective subjects):**
---
**1. BIOLOGY (Common for NEET & JEE)**
**Class XI**
1. The Living World
2. Biological Classification
3. Plant Kingdom
4. Animal Kingdom
5. Morphology of Flowering Plants
6. Anatomy of Flowering Plants
7. Structural Organisation in Animals
8. Cell: The Unit of Life
9. Biomolecules
10. Cell Cycle and Cell Division
11. Photosynthesis in Higher Plants
12. Respiration in Plants
13. Plant Growth and Development
14. Breathing and Exchange of Gases
15. Body Fluids and Circulation
16. Excretory Products and their Elimination
17. Locomotion and Movement
18. Neural Control and Coordination
19. Chemical Coordination and Integration
20. Sexual Reproduction in Flowering Plants
21. Human Reproduction
22. Reproductive Health
23. Principles of Inheritance and Variation
24. Molecular Basis of Inheritance
25. Evolution
26. Health and Disease
27. Improvement in Food Production
28. Microbes in Human Welfare
29. Biotechnology - Principles and Processes
30. Biotechnology and Its Applications
31. Organisms and Populations
32. Ecosystem
33. Biodiversity and Its Conservation
---
**2. CHEMISTRY (Common for NEET & JEE)**
**Class XI**
1. Some Basic Concepts of Chemistry
2. Structure of Atom
3. Classification of Elements and Periodicity in Properties
4. Chemical Bonding and Molecular Structure
5. States of Matter: Gases and Liquids
6. Thermodynamics
7. Equilibrium
8. Redox Reactions
9. Hydrogen
10. The s-Block Elements
11. The p-Block Elements (Group 13 and 14)
12. Organic Chemistry – Some Basic Principles and Techniques (GOC)
13. Hydrocarbons
14. Environmental Chemistry
**Class XII**
1. The Solid State
2. Solutions
3. Electrochemistry
4. Chemical Kinetics
5. Surface Chemistry
6. General Principles and Processes of Isolation of Elements (Metallurgy)
7. The p-Block Elements (Group 15 to 18)
8. The d- and f- Block Elements
9. Coordination Compounds
10. Haloalkanes and Haloarenes
11. Alcohols, Phenols and Ethers
12. Aldehydes, Ketones and Carboxylic Acids
13. Amines
14. Biomolecules
15. Polymers
16. Chemistry in Everyday Life
---
**3. PHYSICS (Common for NEET & JEE)**
**Class XI**
1. Units and Measurements
2. Motion in a Straight Line
3. Motion in a Plane
4. Laws of Motion
5. Work, Energy and Power
6. System of Particles and Rotational Motion
7. Gravitation
8. Mechanical Properties of Solids
9. Mechanical Properties of Fluids
10. Thermal Properties of Matter
11. Thermodynamics
12. Kinetic Theory
13. Oscillations
14. Waves
**Class XII**
1. Electric Charges and Fields
2. Electrostatic Potential and Capacitance
3. Current Electricity
4. Moving Charges and Magnetism
5. Magnetism and Matter
6. Electromagnetic Induction
7. Alternating Current
8. Electromagnetic Waves
9. Ray Optics and Optical Instruments
10. Wave Optics
11. Dual Nature of Radiation and Matter
12. Atoms
13. Nuclei
14. Semiconductor Electronics: Materials, Devices and Simple Circuits
15. Communication Systems
---
**4. MATHEMATICS (For JEE Only)**
**Class XI**
1. Sets
2. Relations and Functions
3. Trigonometric Functions
4. Principle of Mathematical Induction
5. Complex Numbers and Quadratic Equations
6. Linear Inequalities
7. Permutations and Combinations
8. Binomial Theorem
9. Sequences and Series
10. Straight Lines
11. Conic Sections
12. Introduction to Three Dimensional Geometry
13. Limits and Derivatives
14. Mathematical Reasoning
15. Statistics
16. Probability
**Class XII**
1. Relations and Functions
2. Inverse Trigonometric Functions
3. Matrices
4. Determinants
5. Continuity and Differentiability
6. Application of Derivatives
7. Integrals
8. Application of Integrals
9. Differential Equations
10. Vector Algebra
11. Three Dimensional Geometry
12. Linear Programming
13. Probability
---
**Classification Guidelines:**
1. **Primary Classification**: Identify the single most relevant subject, and then the most relevant chapter(s) within that subject, that directly addresses the question's core concept.
2. **Multi-Chapter Questions**: If a question explicitly spans 2-3 distinct chapters, include all relevant chapters.
3. **Confidence Scoring** (0.0 to 1.0):
* **1.0**: Perfect match
* **0.8-0.9**: Strong match
* **0.5-0.7**: Moderate match
* **Below 0.5**: Avoid unless unavoidable.
4. **Non-Syllabus Questions**: If a question is not from any of the provided subjects/chapters, set `subject` to 'Unclassified' and `chapter_title` to 'Unclassified'.
**Critical Requirements:**
- Use ONLY the subject titles exactly as listed above, or 'Unclassified'.
- Use ONLY the chapter titles exactly as listed above, or 'Unclassified'.
- Preserve the original question text completely.
- Output ONLY valid JSON.
- The "index" field MUST match the question number shown in the input (e.g., if the question is numbered "8.", then "index": 8).
**Output JSON Schema:**
```json
{{
"data": [
{{
"index": 1,
"subject": "<exact subject title from list or 'Unclassified'>",
"chapter_index": <chapter number or 0>,
"chapter_title": "<exact chapter title from list or 'Unclassified'>",
"original_question_text": "<complete original question with all formatting>",
"confidence": <0.0 to 1.0>
}}
],
"success": [true]
}}
```
Now classify the following question(s):
```
{input_text}
```
"""
return prompt