#!/usr/bin/env python3 """ File Upload Interface ==================== Web-based file upload interface for high capacity input processing. """ from flask import Flask, request, jsonify, render_template_string, redirect, url_for import os import json from pathlib import Path from werkzeug.utils import secure_filename from high_capacity_input_processor import HighCapacityInputProcessor import threading import time app = Flask(__name__) app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 # 100MB max file size # Initialize processor processor = HighCapacityInputProcessor() # HTML template for the upload interface UPLOAD_TEMPLATE = """ LiMp High Capacity Input Processor

🧠 LiMp High Capacity Input Processor

Upload files or enter large text for intelligent chunking and training data generation

📁 File Upload

Drag and drop files here or click to select

Supported formats: TXT, MD, PY, JS, HTML, CSS, JSON, CSV, PDF, DOC, DOCX, XML, YAML

📝 Large Text Input

Character count: 0

📊 Processing Results

""" @app.route('/') def index(): """Main upload page.""" return render_template_string(UPLOAD_TEMPLATE) @app.route('/upload', methods=['POST']) def upload_files(): """Handle file uploads and text input.""" try: uploads = [] # Process uploaded files if 'files' in request.files: files = request.files.getlist('files') for file in files: if file.filename: # Save uploaded file temporarily filename = secure_filename(file.filename) temp_path = Path(processor.upload_dir) / filename file.save(str(temp_path)) # Process file file_upload = processor.process_file_upload(temp_path) uploads.append({ 'file_id': file_upload.file_id, 'filename': file_upload.filename, 'file_size': file_upload.file_size, 'mime_type': file_upload.mime_type, 'chunks': len(file_upload.chunks) }) # Process text input text_content = request.form.get('text', '').strip() if text_content: chunks = processor.process_high_capacity_input(text_content) uploads.append({ 'file_id': 'text_input_' + str(int(time.time())), 'filename': 'text_input.txt', 'file_size': len(text_content), 'mime_type': 'text/plain', 'chunks': len(chunks) }) # Get processing stats stats = processor.get_processing_stats() return jsonify({ 'success': True, 'uploads': uploads, 'stats': stats }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 400 @app.route('/generate_training_data', methods=['POST']) def generate_training_data(): """Generate training data from processed uploads.""" try: data = request.get_json() file_ids = data.get('file_ids', []) # Get all uploads all_uploads = processor.get_all_uploads() # Filter by file IDs if provided if file_ids: filtered_uploads = [upload for upload in all_uploads if upload.file_id in file_ids] else: filtered_uploads = all_uploads # Generate training data training_data_file = processor.create_training_data_from_chunks( filtered_uploads, output_format='jsonl', include_metadata=True ) return jsonify({ 'success': True, 'training_data_file': Path(training_data_file).name, 'file_path': training_data_file, 'total_examples': sum(len(upload.chunks) for upload in filtered_uploads) }) except Exception as e: return jsonify({ 'success': False, 'error': str(e) }), 400 @app.route('/download/') def download_file(filename): """Download generated training data file.""" file_path = processor.training_data_dir / filename if file_path.exists(): return send_file(str(file_path), as_attachment=True) else: return "File not found", 404 @app.route('/stats') def get_stats(): """Get processing statistics.""" stats = processor.get_processing_stats() return jsonify(stats) @app.route('/uploads') def list_uploads(): """List all uploads.""" uploads = processor.get_all_uploads() return jsonify([{ 'file_id': upload.file_id, 'filename': upload.filename, 'file_size': upload.file_size, 'mime_type': upload.mime_type, 'upload_timestamp': upload.upload_timestamp, 'chunks': len(upload.chunks) } for upload in uploads]) if __name__ == '__main__': print("🚀 Starting LiMp High Capacity Input Processor") print("📁 Upload directory:", processor.upload_dir) print("📁 Chunk directory:", processor.chunk_dir) print("📁 Training data directory:", processor.training_data_dir) print("🌐 Web interface: http://localhost:5000") app.run(debug=True, host='0.0.0.0', port=5000)