|
|
|
|
|
""" |
|
|
File Upload Interface |
|
|
==================== |
|
|
Web-based file upload interface for high capacity input processing. |
|
|
""" |
|
|
|
|
|
from flask import Flask, request, jsonify, render_template_string, redirect, url_for |
|
|
import os |
|
|
import json |
|
|
from pathlib import Path |
|
|
from werkzeug.utils import secure_filename |
|
|
from high_capacity_input_processor import HighCapacityInputProcessor |
|
|
import threading |
|
|
import time |
|
|
|
|
|
app = Flask(__name__) |
|
|
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024 |
|
|
|
|
|
|
|
|
processor = HighCapacityInputProcessor() |
|
|
|
|
|
|
|
|
UPLOAD_TEMPLATE = """ |
|
|
<!DOCTYPE html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
<title>LiMp High Capacity Input Processor</title> |
|
|
<style> |
|
|
body { |
|
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
|
|
max-width: 1200px; |
|
|
margin: 0 auto; |
|
|
padding: 20px; |
|
|
background-color: #f5f5f5; |
|
|
} |
|
|
.container { |
|
|
background: white; |
|
|
padding: 30px; |
|
|
border-radius: 10px; |
|
|
box-shadow: 0 2px 10px rgba(0,0,0,0.1); |
|
|
} |
|
|
.header { |
|
|
text-align: center; |
|
|
margin-bottom: 30px; |
|
|
} |
|
|
.header h1 { |
|
|
color: #333; |
|
|
margin-bottom: 10px; |
|
|
} |
|
|
.header p { |
|
|
color: #666; |
|
|
font-size: 16px; |
|
|
} |
|
|
.upload-section { |
|
|
border: 2px dashed #ddd; |
|
|
border-radius: 10px; |
|
|
padding: 40px; |
|
|
text-align: center; |
|
|
margin-bottom: 30px; |
|
|
transition: border-color 0.3s; |
|
|
} |
|
|
.upload-section:hover { |
|
|
border-color: #4CAF50; |
|
|
} |
|
|
.upload-section.dragover { |
|
|
border-color: #4CAF50; |
|
|
background-color: #f0f8f0; |
|
|
} |
|
|
.file-input { |
|
|
margin: 20px 0; |
|
|
} |
|
|
.file-input input[type="file"] { |
|
|
display: none; |
|
|
} |
|
|
.file-input label { |
|
|
display: inline-block; |
|
|
padding: 12px 24px; |
|
|
background-color: #4CAF50; |
|
|
color: white; |
|
|
border-radius: 5px; |
|
|
cursor: pointer; |
|
|
font-size: 16px; |
|
|
transition: background-color 0.3s; |
|
|
} |
|
|
.file-input label:hover { |
|
|
background-color: #45a049; |
|
|
} |
|
|
.text-input { |
|
|
margin: 20px 0; |
|
|
} |
|
|
.text-input textarea { |
|
|
width: 100%; |
|
|
height: 200px; |
|
|
padding: 15px; |
|
|
border: 1px solid #ddd; |
|
|
border-radius: 5px; |
|
|
font-family: monospace; |
|
|
font-size: 14px; |
|
|
resize: vertical; |
|
|
} |
|
|
.submit-btn { |
|
|
background-color: #2196F3; |
|
|
color: white; |
|
|
padding: 15px 30px; |
|
|
border: none; |
|
|
border-radius: 5px; |
|
|
font-size: 16px; |
|
|
cursor: pointer; |
|
|
margin: 10px 5px; |
|
|
transition: background-color 0.3s; |
|
|
} |
|
|
.submit-btn:hover { |
|
|
background-color: #1976D2; |
|
|
} |
|
|
.submit-btn:disabled { |
|
|
background-color: #ccc; |
|
|
cursor: not-allowed; |
|
|
} |
|
|
.progress { |
|
|
width: 100%; |
|
|
height: 20px; |
|
|
background-color: #f0f0f0; |
|
|
border-radius: 10px; |
|
|
overflow: hidden; |
|
|
margin: 20px 0; |
|
|
display: none; |
|
|
} |
|
|
.progress-bar { |
|
|
height: 100%; |
|
|
background-color: #4CAF50; |
|
|
width: 0%; |
|
|
transition: width 0.3s; |
|
|
} |
|
|
.results { |
|
|
margin-top: 30px; |
|
|
padding: 20px; |
|
|
background-color: #f9f9f9; |
|
|
border-radius: 5px; |
|
|
display: none; |
|
|
} |
|
|
.stats { |
|
|
display: grid; |
|
|
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); |
|
|
gap: 20px; |
|
|
margin: 20px 0; |
|
|
} |
|
|
.stat-card { |
|
|
background: white; |
|
|
padding: 20px; |
|
|
border-radius: 5px; |
|
|
text-align: center; |
|
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1); |
|
|
} |
|
|
.stat-number { |
|
|
font-size: 24px; |
|
|
font-weight: bold; |
|
|
color: #4CAF50; |
|
|
} |
|
|
.stat-label { |
|
|
color: #666; |
|
|
margin-top: 5px; |
|
|
} |
|
|
.file-list { |
|
|
margin-top: 20px; |
|
|
} |
|
|
.file-item { |
|
|
background: white; |
|
|
padding: 15px; |
|
|
margin: 10px 0; |
|
|
border-radius: 5px; |
|
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1); |
|
|
} |
|
|
.file-name { |
|
|
font-weight: bold; |
|
|
color: #333; |
|
|
} |
|
|
.file-info { |
|
|
color: #666; |
|
|
font-size: 14px; |
|
|
margin-top: 5px; |
|
|
} |
|
|
.chunk-info { |
|
|
color: #888; |
|
|
font-size: 12px; |
|
|
margin-top: 5px; |
|
|
} |
|
|
.error { |
|
|
color: #f44336; |
|
|
background-color: #ffebee; |
|
|
padding: 15px; |
|
|
border-radius: 5px; |
|
|
margin: 20px 0; |
|
|
} |
|
|
.success { |
|
|
color: #4CAF50; |
|
|
background-color: #e8f5e8; |
|
|
padding: 15px; |
|
|
border-radius: 5px; |
|
|
margin: 20px 0; |
|
|
} |
|
|
.download-btn { |
|
|
background-color: #FF9800; |
|
|
color: white; |
|
|
padding: 8px 16px; |
|
|
border: none; |
|
|
border-radius: 3px; |
|
|
font-size: 12px; |
|
|
cursor: pointer; |
|
|
margin-top: 10px; |
|
|
} |
|
|
.download-btn:hover { |
|
|
background-color: #F57C00; |
|
|
} |
|
|
</style> |
|
|
</head> |
|
|
<body> |
|
|
<div class="container"> |
|
|
<div class="header"> |
|
|
<h1>π§ LiMp High Capacity Input Processor</h1> |
|
|
<p>Upload files or enter large text for intelligent chunking and training data generation</p> |
|
|
</div> |
|
|
|
|
|
<form id="uploadForm" enctype="multipart/form-data"> |
|
|
<div class="upload-section" id="uploadSection"> |
|
|
<h3>π File Upload</h3> |
|
|
<p>Drag and drop files here or click to select</p> |
|
|
<div class="file-input"> |
|
|
<label for="fileInput">Choose Files</label> |
|
|
<input type="file" id="fileInput" name="files" multiple accept=".txt,.md,.py,.js,.html,.css,.json,.jsonl,.csv,.pdf,.doc,.docx,.xml,.yaml,.yml"> |
|
|
</div> |
|
|
<p><small>Supported formats: TXT, MD, PY, JS, HTML, CSS, JSON, CSV, PDF, DOC, DOCX, XML, YAML</small></p> |
|
|
</div> |
|
|
|
|
|
<div class="text-input"> |
|
|
<h3>π Large Text Input</h3> |
|
|
<textarea id="textInput" name="text" placeholder="Enter large text content here (up to 100MB)..."></textarea> |
|
|
<p><small>Character count: <span id="charCount">0</span></small></p> |
|
|
</div> |
|
|
|
|
|
<div> |
|
|
<button type="submit" class="submit-btn" id="submitBtn">Process Input</button> |
|
|
<button type="button" class="submit-btn" onclick="generateTrainingData()">Generate Training Data</button> |
|
|
<button type="button" class="submit-btn" onclick="clearAll()">Clear All</button> |
|
|
</div> |
|
|
|
|
|
<div class="progress" id="progress"> |
|
|
<div class="progress-bar" id="progressBar"></div> |
|
|
</div> |
|
|
</form> |
|
|
|
|
|
<div class="results" id="results"> |
|
|
<h3>π Processing Results</h3> |
|
|
<div id="resultsContent"></div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<script> |
|
|
let uploads = []; |
|
|
|
|
|
// File input handling |
|
|
const fileInput = document.getElementById('fileInput'); |
|
|
const uploadSection = document.getElementById('uploadSection'); |
|
|
const textInput = document.getElementById('textInput'); |
|
|
const charCount = document.getElementById('charCount'); |
|
|
const submitBtn = document.getElementById('submitBtn'); |
|
|
const progress = document.getElementById('progress'); |
|
|
const progressBar = document.getElementById('progressBar'); |
|
|
const results = document.getElementById('results'); |
|
|
const resultsContent = document.getElementById('resultsContent'); |
|
|
|
|
|
// Character count update |
|
|
textInput.addEventListener('input', function() { |
|
|
charCount.textContent = this.value.length.toLocaleString(); |
|
|
}); |
|
|
|
|
|
// Drag and drop handling |
|
|
uploadSection.addEventListener('dragover', function(e) { |
|
|
e.preventDefault(); |
|
|
this.classList.add('dragover'); |
|
|
}); |
|
|
|
|
|
uploadSection.addEventListener('dragleave', function(e) { |
|
|
e.preventDefault(); |
|
|
this.classList.remove('dragover'); |
|
|
}); |
|
|
|
|
|
uploadSection.addEventListener('drop', function(e) { |
|
|
e.preventDefault(); |
|
|
this.classList.remove('dragover'); |
|
|
fileInput.files = e.dataTransfer.files; |
|
|
updateFileList(); |
|
|
}); |
|
|
|
|
|
fileInput.addEventListener('change', updateFileList); |
|
|
|
|
|
function updateFileList() { |
|
|
const files = fileInput.files; |
|
|
if (files.length > 0) { |
|
|
let fileList = '<h4>Selected Files:</h4>'; |
|
|
for (let file of files) { |
|
|
fileList += `<div class="file-item"> |
|
|
<div class="file-name">${file.name}</div> |
|
|
<div class="file-info">Size: ${(file.size / 1024 / 1024).toFixed(2)} MB, Type: ${file.type}</div> |
|
|
</div>`; |
|
|
} |
|
|
uploadSection.innerHTML = fileList + uploadSection.innerHTML; |
|
|
} |
|
|
} |
|
|
|
|
|
// Form submission |
|
|
document.getElementById('uploadForm').addEventListener('submit', async function(e) { |
|
|
e.preventDefault(); |
|
|
|
|
|
const formData = new FormData(); |
|
|
const files = fileInput.files; |
|
|
const text = textInput.value.trim(); |
|
|
|
|
|
if (files.length === 0 && text === '') { |
|
|
alert('Please select files or enter text content'); |
|
|
return; |
|
|
} |
|
|
|
|
|
// Add files |
|
|
for (let file of files) { |
|
|
formData.append('files', file); |
|
|
} |
|
|
|
|
|
// Add text |
|
|
if (text) { |
|
|
formData.append('text', text); |
|
|
} |
|
|
|
|
|
submitBtn.disabled = true; |
|
|
progress.style.display = 'block'; |
|
|
progressBar.style.width = '0%'; |
|
|
|
|
|
try { |
|
|
// Simulate progress |
|
|
let progressValue = 0; |
|
|
const progressInterval = setInterval(() => { |
|
|
progressValue += Math.random() * 15; |
|
|
if (progressValue > 90) progressValue = 90; |
|
|
progressBar.style.width = progressValue + '%'; |
|
|
}, 200); |
|
|
|
|
|
const response = await fetch('/upload', { |
|
|
method: 'POST', |
|
|
body: formData |
|
|
}); |
|
|
|
|
|
clearInterval(progressInterval); |
|
|
progressBar.style.width = '100%'; |
|
|
|
|
|
const result = await response.json(); |
|
|
|
|
|
if (result.success) { |
|
|
uploads = result.uploads || []; |
|
|
showResults(result); |
|
|
} else { |
|
|
showError(result.error); |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
showError('Upload failed: ' + error.message); |
|
|
} finally { |
|
|
submitBtn.disabled = false; |
|
|
setTimeout(() => { |
|
|
progress.style.display = 'none'; |
|
|
progressBar.style.width = '0%'; |
|
|
}, 1000); |
|
|
} |
|
|
}); |
|
|
|
|
|
async function generateTrainingData() { |
|
|
if (uploads.length === 0) { |
|
|
alert('No uploads available. Please upload files or enter text first.'); |
|
|
return; |
|
|
} |
|
|
|
|
|
try { |
|
|
const response = await fetch('/generate_training_data', { |
|
|
method: 'POST', |
|
|
headers: { |
|
|
'Content-Type': 'application/json', |
|
|
}, |
|
|
body: JSON.stringify({ file_ids: uploads.map(u => u.file_id) }) |
|
|
}); |
|
|
|
|
|
const result = await response.json(); |
|
|
|
|
|
if (result.success) { |
|
|
showSuccess(`Training data generated: ${result.training_data_file}`); |
|
|
// Add download link |
|
|
const downloadLink = document.createElement('a'); |
|
|
downloadLink.href = `/download/${result.training_data_file}`; |
|
|
downloadLink.textContent = 'Download Training Data'; |
|
|
downloadLink.className = 'download-btn'; |
|
|
resultsContent.appendChild(downloadLink); |
|
|
} else { |
|
|
showError(result.error); |
|
|
} |
|
|
|
|
|
} catch (error) { |
|
|
showError('Training data generation failed: ' + error.message); |
|
|
} |
|
|
} |
|
|
|
|
|
function showResults(data) { |
|
|
let html = '<div class="success">Processing completed successfully!</div>'; |
|
|
|
|
|
// Statistics |
|
|
html += '<div class="stats">'; |
|
|
html += `<div class="stat-card"> |
|
|
<div class="stat-number">${data.stats.total_files}</div> |
|
|
<div class="stat-label">Files Processed</div> |
|
|
</div>`; |
|
|
html += `<div class="stat-card"> |
|
|
<div class="stat-number">${data.stats.total_chunks}</div> |
|
|
<div class="stat-label">Chunks Created</div> |
|
|
</div>`; |
|
|
html += `<div class="stat-card"> |
|
|
<div class="stat-number">${data.stats.total_size_mb.toFixed(2)}</div> |
|
|
<div class="stat-label">Total Size (MB)</div> |
|
|
</div>`; |
|
|
html += '</div>'; |
|
|
|
|
|
// File list |
|
|
if (data.uploads && data.uploads.length > 0) { |
|
|
html += '<div class="file-list">'; |
|
|
html += '<h4>Processed Files:</h4>'; |
|
|
for (let upload of data.uploads) { |
|
|
html += `<div class="file-item"> |
|
|
<div class="file-name">${upload.filename}</div> |
|
|
<div class="file-info">Size: ${(upload.file_size / 1024 / 1024).toFixed(2)} MB, Type: ${upload.mime_type}</div> |
|
|
<div class="chunk-info">Chunks: ${upload.chunks.length}</div> |
|
|
</div>`; |
|
|
} |
|
|
html += '</div>'; |
|
|
} |
|
|
|
|
|
resultsContent.innerHTML = html; |
|
|
results.style.display = 'block'; |
|
|
} |
|
|
|
|
|
function showError(message) { |
|
|
resultsContent.innerHTML = `<div class="error">Error: ${message}</div>`; |
|
|
results.style.display = 'block'; |
|
|
} |
|
|
|
|
|
function showSuccess(message) { |
|
|
resultsContent.innerHTML = `<div class="success">${message}</div>`; |
|
|
results.style.display = 'block'; |
|
|
} |
|
|
|
|
|
function clearAll() { |
|
|
fileInput.value = ''; |
|
|
textInput.value = ''; |
|
|
charCount.textContent = '0'; |
|
|
uploads = []; |
|
|
results.style.display = 'none'; |
|
|
uploadSection.innerHTML = ` |
|
|
<h3>π File Upload</h3> |
|
|
<p>Drag and drop files here or click to select</p> |
|
|
<div class="file-input"> |
|
|
<label for="fileInput">Choose Files</label> |
|
|
<input type="file" id="fileInput" name="files" multiple accept=".txt,.md,.py,.js,.html,.css,.json,.jsonl,.csv,.pdf,.doc,.docx,.xml,.yaml,.yml"> |
|
|
</div> |
|
|
<p><small>Supported formats: TXT, MD, PY, JS, HTML, CSS, JSON, CSV, PDF, DOC, DOCX, XML, YAML</small></p> |
|
|
`; |
|
|
} |
|
|
</script> |
|
|
</body> |
|
|
</html> |
|
|
""" |
|
|
|
|
|
@app.route('/') |
|
|
def index(): |
|
|
"""Main upload page.""" |
|
|
return render_template_string(UPLOAD_TEMPLATE) |
|
|
|
|
|
@app.route('/upload', methods=['POST']) |
|
|
def upload_files(): |
|
|
"""Handle file uploads and text input.""" |
|
|
try: |
|
|
uploads = [] |
|
|
|
|
|
|
|
|
if 'files' in request.files: |
|
|
files = request.files.getlist('files') |
|
|
for file in files: |
|
|
if file.filename: |
|
|
|
|
|
filename = secure_filename(file.filename) |
|
|
temp_path = Path(processor.upload_dir) / filename |
|
|
file.save(str(temp_path)) |
|
|
|
|
|
|
|
|
file_upload = processor.process_file_upload(temp_path) |
|
|
uploads.append({ |
|
|
'file_id': file_upload.file_id, |
|
|
'filename': file_upload.filename, |
|
|
'file_size': file_upload.file_size, |
|
|
'mime_type': file_upload.mime_type, |
|
|
'chunks': len(file_upload.chunks) |
|
|
}) |
|
|
|
|
|
|
|
|
text_content = request.form.get('text', '').strip() |
|
|
if text_content: |
|
|
chunks = processor.process_high_capacity_input(text_content) |
|
|
uploads.append({ |
|
|
'file_id': 'text_input_' + str(int(time.time())), |
|
|
'filename': 'text_input.txt', |
|
|
'file_size': len(text_content), |
|
|
'mime_type': 'text/plain', |
|
|
'chunks': len(chunks) |
|
|
}) |
|
|
|
|
|
|
|
|
stats = processor.get_processing_stats() |
|
|
|
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'uploads': uploads, |
|
|
'stats': stats |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': str(e) |
|
|
}), 400 |
|
|
|
|
|
@app.route('/generate_training_data', methods=['POST']) |
|
|
def generate_training_data(): |
|
|
"""Generate training data from processed uploads.""" |
|
|
try: |
|
|
data = request.get_json() |
|
|
file_ids = data.get('file_ids', []) |
|
|
|
|
|
|
|
|
all_uploads = processor.get_all_uploads() |
|
|
|
|
|
|
|
|
if file_ids: |
|
|
filtered_uploads = [upload for upload in all_uploads if upload.file_id in file_ids] |
|
|
else: |
|
|
filtered_uploads = all_uploads |
|
|
|
|
|
|
|
|
training_data_file = processor.create_training_data_from_chunks( |
|
|
filtered_uploads, |
|
|
output_format='jsonl', |
|
|
include_metadata=True |
|
|
) |
|
|
|
|
|
return jsonify({ |
|
|
'success': True, |
|
|
'training_data_file': Path(training_data_file).name, |
|
|
'file_path': training_data_file, |
|
|
'total_examples': sum(len(upload.chunks) for upload in filtered_uploads) |
|
|
}) |
|
|
|
|
|
except Exception as e: |
|
|
return jsonify({ |
|
|
'success': False, |
|
|
'error': str(e) |
|
|
}), 400 |
|
|
|
|
|
@app.route('/download/<filename>') |
|
|
def download_file(filename): |
|
|
"""Download generated training data file.""" |
|
|
file_path = processor.training_data_dir / filename |
|
|
if file_path.exists(): |
|
|
return send_file(str(file_path), as_attachment=True) |
|
|
else: |
|
|
return "File not found", 404 |
|
|
|
|
|
@app.route('/stats') |
|
|
def get_stats(): |
|
|
"""Get processing statistics.""" |
|
|
stats = processor.get_processing_stats() |
|
|
return jsonify(stats) |
|
|
|
|
|
@app.route('/uploads') |
|
|
def list_uploads(): |
|
|
"""List all uploads.""" |
|
|
uploads = processor.get_all_uploads() |
|
|
return jsonify([{ |
|
|
'file_id': upload.file_id, |
|
|
'filename': upload.filename, |
|
|
'file_size': upload.file_size, |
|
|
'mime_type': upload.mime_type, |
|
|
'upload_timestamp': upload.upload_timestamp, |
|
|
'chunks': len(upload.chunks) |
|
|
} for upload in uploads]) |
|
|
|
|
|
if __name__ == '__main__': |
|
|
print("π Starting LiMp High Capacity Input Processor") |
|
|
print("π Upload directory:", processor.upload_dir) |
|
|
print("π Chunk directory:", processor.chunk_dir) |
|
|
print("π Training data directory:", processor.training_data_dir) |
|
|
print("π Web interface: http://localhost:5000") |
|
|
|
|
|
app.run(debug=True, host='0.0.0.0', port=5000) |
|
|
|