kabudadada
Add Foam-Agent MCP service with conda environment support
7eb1167
#!/usr/bin/env python3
"""
Convert FoamGPT fine-tune data to OpenAI format for supervised fine-tuning.
"""
import json
import os
from pathlib import Path
def convert_to_openai_format(input_file, output_file):
"""
Convert FoamGPT fine-tune data to OpenAI format.
Args:
input_file (str): Path to input JSONL file
output_file (str): Path to output JSONL file
"""
# Create output directory if it doesn't exist
output_path = Path(output_file)
output_path.parent.mkdir(parents=True, exist_ok=True)
converted_count = 0
error_count = 0
with open(input_file, 'r', encoding='utf-8') as infile, \
open(output_file, 'w', encoding='utf-8') as outfile:
for line_num, line in enumerate(infile, 1):
try:
# Parse the original data
data = json.loads(line.strip())
# Create OpenAI format
openai_format = {
"messages": [
{
"role": "system",
"content": data['system_prompt']
},
{
"role": "user",
"content": data['user_prompt']
},
{
"role": "assistant",
"content": data['file_content']
}
]
}
# Write to output file
outfile.write(json.dumps(openai_format, ensure_ascii=False) + '\n')
converted_count += 1
# Progress indicator
if converted_count % 100 == 0:
print(f"Converted {converted_count} records...")
except json.JSONDecodeError as e:
print(f"Error parsing line {line_num}: {e}")
error_count += 1
continue
except Exception as e:
print(f"Unexpected error on line {line_num}: {e}")
error_count += 1
continue
print(f"\nConversion completed!")
print(f"Successfully converted: {converted_count} records")
print(f"Errors encountered: {error_count} records")
print(f"Output saved to: {output_file}")
def main():
"""Main function to run the conversion."""
# Define input and output paths
input_file = f"{Path(__file__).parent}/data/foamgpt_train.jsonl"
output_file = f"{Path(__file__).parent}/data/foamgpt_openai_train.jsonl"
# Check if input file exists
if not os.path.exists(input_file):
print(f"Error: Input file '{input_file}' not found!")
return
print(f"Converting {input_file} to OpenAI format...")
print(f"Output will be saved to: {output_file}")
# Perform conversion
convert_to_openai_format(input_file, output_file)
# Define input and output paths
input_file = f"{Path(__file__).parent}/data/foamgpt_test.jsonl"
output_file = f"{Path(__file__).parent}/data/foamgpt_openai_test.jsonl"
# Check if input file exists
if not os.path.exists(input_file):
print(f"Error: Input file '{input_file}' not found!")
return
print(f"Converting {input_file} to OpenAI format...")
print(f"Output will be saved to: {output_file}")
# Perform conversion
convert_to_openai_format(input_file, output_file)
if __name__ == "__main__":
main()