DouDou commited on
Commit
2116622
·
verified ·
1 Parent(s): b9ea1e7

Upload data2/instruction_generation/schemas.py with huggingface_hub

Browse files
data2/instruction_generation/schemas.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Pydantic data structure definitions for LLM structured output
3
+ """
4
+
5
+ from pydantic import BaseModel, Field
6
+ from typing import List, Optional
7
+
8
+
9
+ class READMESummary(BaseModel):
10
+ """Structured output for README summary"""
11
+ project_overview: str = Field(description="One-sentence project overview")
12
+ main_features: str = Field(description="Main features list (bullet points)")
13
+ dependencies: str = Field(description="Dependencies and environment requirements")
14
+ how_to_run: str = Field(description="How to run/install")
15
+ directory_structure: str = Field(description="Directory structure highlights")
16
+ scientific_computing_related: Optional[str] = Field(
17
+ default=None,
18
+ description="Scientific computing/chemistry related features (if any)"
19
+ )
20
+ typical_input_output: Optional[str] = Field(
21
+ default=None,
22
+ description="Typical input/output examples (if any)"
23
+ )
24
+ notes: Optional[str] = Field(
25
+ default=None,
26
+ description="Notes or special instructions"
27
+ )
28
+
29
+
30
+ class FunctionInfo(BaseModel):
31
+ """Function information"""
32
+ function_name: str = Field(description="Function name")
33
+ function_start_line: int = Field(description="Function start line number (1-indexed, inclusive)")
34
+ function_end_line: int = Field(description="Function end line number (1-indexed, inclusive)")
35
+ function_body: str = Field(description="Complete function code body")
36
+ doc_start_line: Optional[int] = Field(
37
+ default=None,
38
+ description="Documentation comment start line number (if any, 1-indexed, inclusive)"
39
+ )
40
+ doc_end_line: Optional[int] = Field(
41
+ default=None,
42
+ description="Documentation comment end line number (if any, 1-indexed, inclusive)"
43
+ )
44
+
45
+
46
+ class FileParseResult(BaseModel):
47
+ """Parse result for a single code file"""
48
+ language: str = Field(description="Programming language (e.g., python, cpp, java)")
49
+ file_path: str = Field(description="Relative file path")
50
+ dependencies: List[str] = Field(
51
+ default_factory=list,
52
+ description="File-level dependency list (import/include/use/require, etc.)"
53
+ )
54
+ functions: List[FunctionInfo] = Field(
55
+ default_factory=list,
56
+ description="List of all functions in the file"
57
+ )