Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,14 +5,21 @@ import subprocess
|
|
| 5 |
import shutil
|
| 6 |
import time
|
| 7 |
import sys
|
|
|
|
| 8 |
from typing import Dict, List, Optional, Any
|
| 9 |
from huggingface_hub import HfApi, hf_hub_url
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# Fix Unicode encoding for Windows
|
| 12 |
if sys.platform == 'win32':
|
| 13 |
import io
|
| 14 |
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
# ==== CONFIGURATION ====
|
| 17 |
HF_TOKEN = ""
|
| 18 |
SOURCE_REPO_ID = "Fred808/BG3" # Fetch audio files from here
|
|
@@ -532,3 +539,139 @@ def main_processing_loop():
|
|
| 532 |
|
| 533 |
if __name__ == "__main__":
|
| 534 |
main_processing_loop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
import shutil
|
| 6 |
import time
|
| 7 |
import sys
|
| 8 |
+
import threading
|
| 9 |
from typing import Dict, List, Optional, Any
|
| 10 |
from huggingface_hub import HfApi, hf_hub_url
|
| 11 |
+
from fastapi import FastAPI, HTTPException
|
| 12 |
+
from fastapi.responses import JSONResponse
|
| 13 |
+
import uvicorn
|
| 14 |
|
| 15 |
# Fix Unicode encoding for Windows
|
| 16 |
if sys.platform == 'win32':
|
| 17 |
import io
|
| 18 |
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
|
| 19 |
|
| 20 |
+
# Initialize FastAPI app
|
| 21 |
+
app = FastAPI(title="Audio Transcriber", description="Audio transcription and upload service")
|
| 22 |
+
|
| 23 |
# ==== CONFIGURATION ====
|
| 24 |
HF_TOKEN = ""
|
| 25 |
SOURCE_REPO_ID = "Fred808/BG3" # Fetch audio files from here
|
|
|
|
| 539 |
|
| 540 |
if __name__ == "__main__":
|
| 541 |
main_processing_loop()
|
| 542 |
+
|
| 543 |
+
# ===== FASTAPI ENDPOINTS =====
|
| 544 |
+
|
| 545 |
+
@app.get("/")
|
| 546 |
+
async def root():
|
| 547 |
+
"""Root endpoint with service info"""
|
| 548 |
+
return {
|
| 549 |
+
"service": "Audio Transcriber",
|
| 550 |
+
"status": "running",
|
| 551 |
+
"version": "1.0.0",
|
| 552 |
+
"endpoints": {
|
| 553 |
+
"status": "/status",
|
| 554 |
+
"start": "/start",
|
| 555 |
+
"stop": "/stop",
|
| 556 |
+
"process": "/process/{filename}",
|
| 557 |
+
"logs": "/logs"
|
| 558 |
+
}
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
@app.get("/status")
|
| 562 |
+
async def get_status():
|
| 563 |
+
"""Get current processing status"""
|
| 564 |
+
return {
|
| 565 |
+
"is_running": processing_status["is_running"],
|
| 566 |
+
"current_file": processing_status["current_file"],
|
| 567 |
+
"total_files": processing_status["total_files"],
|
| 568 |
+
"processed_files": processing_status["processed_files"],
|
| 569 |
+
"transcribed_files": processing_status["transcribed_files"],
|
| 570 |
+
"failed_files": processing_status["failed_files"],
|
| 571 |
+
"last_update": processing_status["last_update"],
|
| 572 |
+
"recent_logs": processing_status["logs"][-10:]
|
| 573 |
+
}
|
| 574 |
+
|
| 575 |
+
@app.post("/start")
|
| 576 |
+
async def start_processing():
|
| 577 |
+
"""Start the main processing loop"""
|
| 578 |
+
if processing_status["is_running"]:
|
| 579 |
+
raise HTTPException(status_code=400, detail="Processing already running")
|
| 580 |
+
|
| 581 |
+
# Start processing in a separate thread
|
| 582 |
+
thread = threading.Thread(target=main_processing_loop, daemon=True)
|
| 583 |
+
thread.start()
|
| 584 |
+
|
| 585 |
+
return {
|
| 586 |
+
"message": "Processing started",
|
| 587 |
+
"status": "started"
|
| 588 |
+
}
|
| 589 |
+
|
| 590 |
+
@app.post("/stop")
|
| 591 |
+
async def stop_processing():
|
| 592 |
+
"""Stop the main processing loop"""
|
| 593 |
+
if not processing_status["is_running"]:
|
| 594 |
+
raise HTTPException(status_code=400, detail="Processing not running")
|
| 595 |
+
|
| 596 |
+
processing_status["is_running"] = False
|
| 597 |
+
|
| 598 |
+
return {
|
| 599 |
+
"message": "Processing stopped",
|
| 600 |
+
"status": "stopped"
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
@app.get("/logs")
|
| 604 |
+
async def get_logs(limit: int = 50):
|
| 605 |
+
"""Get recent logs"""
|
| 606 |
+
logs = processing_status["logs"][-limit:]
|
| 607 |
+
return {
|
| 608 |
+
"total_logs": len(processing_status["logs"]),
|
| 609 |
+
"recent_logs": logs
|
| 610 |
+
}
|
| 611 |
+
|
| 612 |
+
@app.post("/process/{filename}")
|
| 613 |
+
async def process_single_file(filename: str):
|
| 614 |
+
"""Process a single audio file manually"""
|
| 615 |
+
try:
|
| 616 |
+
log_message(f"🎯 Manual processing requested for: {filename}", "INFO")
|
| 617 |
+
|
| 618 |
+
# Download and process the file
|
| 619 |
+
reference_map = fetch_reference_files(REFERENCE_REPO_ID)
|
| 620 |
+
if not reference_map:
|
| 621 |
+
raise HTTPException(status_code=500, detail="Could not fetch reference files")
|
| 622 |
+
|
| 623 |
+
# Get file URL
|
| 624 |
+
audio_url = hf_hub_url(repo_id=SOURCE_REPO_ID, filename=filename, repo_type="dataset", subfolder=None)
|
| 625 |
+
local_wav_path = os.path.join(DOWNLOAD_FOLDER, os.path.basename(filename))
|
| 626 |
+
|
| 627 |
+
# Download
|
| 628 |
+
if not download_with_retry(audio_url, local_wav_path):
|
| 629 |
+
raise HTTPException(status_code=500, detail="Failed to download file")
|
| 630 |
+
|
| 631 |
+
# Find match
|
| 632 |
+
base_filename = os.path.basename(filename)
|
| 633 |
+
matched_filename = find_matching_filename(base_filename, reference_map)
|
| 634 |
+
|
| 635 |
+
if not matched_filename:
|
| 636 |
+
os.remove(local_wav_path)
|
| 637 |
+
raise HTTPException(status_code=404, detail="No matching filename found")
|
| 638 |
+
|
| 639 |
+
# Process
|
| 640 |
+
if process_audio_file(local_wav_path, reference_map, matched_filename):
|
| 641 |
+
processing_status["transcribed_files"] += 1
|
| 642 |
+
|
| 643 |
+
if os.path.exists(local_wav_path):
|
| 644 |
+
os.remove(local_wav_path)
|
| 645 |
+
|
| 646 |
+
return {
|
| 647 |
+
"status": "success",
|
| 648 |
+
"file": filename,
|
| 649 |
+
"matched": matched_filename,
|
| 650 |
+
"message": "Audio transcribed and uploaded successfully"
|
| 651 |
+
}
|
| 652 |
+
else:
|
| 653 |
+
if os.path.exists(local_wav_path):
|
| 654 |
+
os.remove(local_wav_path)
|
| 655 |
+
raise HTTPException(status_code=500, detail="Processing failed")
|
| 656 |
+
|
| 657 |
+
except Exception as e:
|
| 658 |
+
log_message(f"❌ Manual processing error: {str(e)}", "ERROR")
|
| 659 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 660 |
+
|
| 661 |
+
@app.on_event("startup")
|
| 662 |
+
async def startup_event():
|
| 663 |
+
"""Auto-start processing when server starts"""
|
| 664 |
+
log_message("🚀 Server startup: Auto-starting processing loop", "INFO")
|
| 665 |
+
|
| 666 |
+
# Start processing in a separate thread
|
| 667 |
+
thread = threading.Thread(target=main_processing_loop, daemon=True)
|
| 668 |
+
thread.start()
|
| 669 |
+
|
| 670 |
+
def run_api(host: str = "0.0.0.0", port: int = 8000):
|
| 671 |
+
"""Run the FastAPI server"""
|
| 672 |
+
log_message(f"🚀 Starting FastAPI server on {host}:{port}", "INFO")
|
| 673 |
+
uvicorn.run(app, host=host, port=port)
|
| 674 |
+
|
| 675 |
+
if __name__ == "__main__":
|
| 676 |
+
# Run API server (processing will auto-start via startup event)
|
| 677 |
+
run_api()
|