File size: 2,284 Bytes
2116622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
"""
Pydantic data structure definitions for LLM structured output
"""

from pydantic import BaseModel, Field
from typing import List, Optional


class READMESummary(BaseModel):
    """Structured output for README summary"""
    project_overview: str = Field(description="One-sentence project overview")
    main_features: str = Field(description="Main features list (bullet points)")
    dependencies: str = Field(description="Dependencies and environment requirements")
    how_to_run: str = Field(description="How to run/install")
    directory_structure: str = Field(description="Directory structure highlights")
    scientific_computing_related: Optional[str] = Field(
        default=None, 
        description="Scientific computing/chemistry related features (if any)"
    )
    typical_input_output: Optional[str] = Field(
        default=None, 
        description="Typical input/output examples (if any)"
    )
    notes: Optional[str] = Field(
        default=None, 
        description="Notes or special instructions"
    )


class FunctionInfo(BaseModel):
    """Function information"""
    function_name: str = Field(description="Function name")
    function_start_line: int = Field(description="Function start line number (1-indexed, inclusive)")
    function_end_line: int = Field(description="Function end line number (1-indexed, inclusive)")
    function_body: str = Field(description="Complete function code body")
    doc_start_line: Optional[int] = Field(
        default=None, 
        description="Documentation comment start line number (if any, 1-indexed, inclusive)"
    )
    doc_end_line: Optional[int] = Field(
        default=None, 
        description="Documentation comment end line number (if any, 1-indexed, inclusive)"
    )


class FileParseResult(BaseModel):
    """Parse result for a single code file"""
    language: str = Field(description="Programming language (e.g., python, cpp, java)")
    file_path: str = Field(description="Relative file path")
    dependencies: List[str] = Field(
        default_factory=list, 
        description="File-level dependency list (import/include/use/require, etc.)"
    )
    functions: List[FunctionInfo] = Field(
        default_factory=list, 
        description="List of all functions in the file"
    )