DouDou commited on
Upload data2/instruction_generation/schemas.py with huggingface_hub
Browse files
data2/instruction_generation/schemas.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Pydantic data structure definitions for LLM structured output
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from pydantic import BaseModel, Field
|
| 6 |
+
from typing import List, Optional
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class READMESummary(BaseModel):
|
| 10 |
+
"""Structured output for README summary"""
|
| 11 |
+
project_overview: str = Field(description="One-sentence project overview")
|
| 12 |
+
main_features: str = Field(description="Main features list (bullet points)")
|
| 13 |
+
dependencies: str = Field(description="Dependencies and environment requirements")
|
| 14 |
+
how_to_run: str = Field(description="How to run/install")
|
| 15 |
+
directory_structure: str = Field(description="Directory structure highlights")
|
| 16 |
+
scientific_computing_related: Optional[str] = Field(
|
| 17 |
+
default=None,
|
| 18 |
+
description="Scientific computing/chemistry related features (if any)"
|
| 19 |
+
)
|
| 20 |
+
typical_input_output: Optional[str] = Field(
|
| 21 |
+
default=None,
|
| 22 |
+
description="Typical input/output examples (if any)"
|
| 23 |
+
)
|
| 24 |
+
notes: Optional[str] = Field(
|
| 25 |
+
default=None,
|
| 26 |
+
description="Notes or special instructions"
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class FunctionInfo(BaseModel):
|
| 31 |
+
"""Function information"""
|
| 32 |
+
function_name: str = Field(description="Function name")
|
| 33 |
+
function_start_line: int = Field(description="Function start line number (1-indexed, inclusive)")
|
| 34 |
+
function_end_line: int = Field(description="Function end line number (1-indexed, inclusive)")
|
| 35 |
+
function_body: str = Field(description="Complete function code body")
|
| 36 |
+
doc_start_line: Optional[int] = Field(
|
| 37 |
+
default=None,
|
| 38 |
+
description="Documentation comment start line number (if any, 1-indexed, inclusive)"
|
| 39 |
+
)
|
| 40 |
+
doc_end_line: Optional[int] = Field(
|
| 41 |
+
default=None,
|
| 42 |
+
description="Documentation comment end line number (if any, 1-indexed, inclusive)"
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class FileParseResult(BaseModel):
|
| 47 |
+
"""Parse result for a single code file"""
|
| 48 |
+
language: str = Field(description="Programming language (e.g., python, cpp, java)")
|
| 49 |
+
file_path: str = Field(description="Relative file path")
|
| 50 |
+
dependencies: List[str] = Field(
|
| 51 |
+
default_factory=list,
|
| 52 |
+
description="File-level dependency list (import/include/use/require, etc.)"
|
| 53 |
+
)
|
| 54 |
+
functions: List[FunctionInfo] = Field(
|
| 55 |
+
default_factory=list,
|
| 56 |
+
description="List of all functions in the file"
|
| 57 |
+
)
|