aman1762 commited on
Commit
21a69f9
·
verified ·
1 Parent(s): 58336be

Update chunker.py

Browse files
Files changed (1) hide show
  1. chunker.py +7 -6
chunker.py CHANGED
@@ -1,17 +1,18 @@
1
- import re
2
  from langchain_core.documents import Document
3
 
 
4
 
5
- def chunk_code(file_path, code):
6
  chunks = []
7
- functions = re.split(r'\n(?=def |class )', code)
8
 
9
- for block in functions:
10
- if len(block.strip()) > 50:
 
11
  chunks.append(
12
  Document(
13
- page_content=block,
14
  metadata={"file": file_path}
15
  )
16
  )
 
17
  return chunks
 
 
1
  from langchain_core.documents import Document
2
 
3
+ MAX_CHARS = 1200
4
 
5
+ def chunk_code(file_path: str, code: str):
6
  chunks = []
 
7
 
8
+ for i in range(0, len(code), MAX_CHARS):
9
+ chunk = code[i:i + MAX_CHARS]
10
+ if len(chunk.strip()) > 100:
11
  chunks.append(
12
  Document(
13
+ page_content=chunk,
14
  metadata={"file": file_path}
15
  )
16
  )
17
+
18
  return chunks