Spaces:
Sleeping
Sleeping
Upload 5 files
Browse files- app/_init_.py +0 -0
- app/clients.py +62 -0
- app/config.py +6 -0
- app/main.py +141 -0
- requirements.txt +5 -0
app/_init_.py
ADDED
|
File without changes
|
app/clients.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
from typing import Any, Dict, List, Optional
|
| 3 |
+
import httpx
|
| 4 |
+
|
| 5 |
+
from .config import MEDIA_URL, TRANSCRIPTION_URL, INGESTION_URL
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class ServiceError(Exception):
|
| 9 |
+
pass
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def upload_to_media(file_bytes: bytes, filename: str, content_type: str) -> Dict[str, Any]:
|
| 13 |
+
url = f"{MEDIA_URL.rstrip('/')}/media/upload"
|
| 14 |
+
files = {"file": (filename, file_bytes, content_type or "application/octet-stream")}
|
| 15 |
+
with httpx.Client(timeout=180.0) as client:
|
| 16 |
+
r = client.post(url, files=files, data={"tag": "course_builder", "source": "orchestrator"})
|
| 17 |
+
r.raise_for_status()
|
| 18 |
+
return r.json()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def run_transcription(media_id: str, action: str, opts: Dict[str, Any]) -> Dict[str, Any]:
|
| 22 |
+
url = f"{TRANSCRIPTION_URL.rstrip('/')}/run"
|
| 23 |
+
payload = {
|
| 24 |
+
"request_id": f"req_trans_{media_id}",
|
| 25 |
+
"engine": "transcription",
|
| 26 |
+
"action": action, # transcribe.audio | transcribe.video
|
| 27 |
+
"actor": {"user_id": opts.get("user_id", "u1"), "session_id": opts.get("session_id", "s1")},
|
| 28 |
+
"input": {"refs": {"media_id": media_id}, "items": [], "text": ""},
|
| 29 |
+
"context": {},
|
| 30 |
+
"options": {
|
| 31 |
+
"lang": opts.get("lang", "en"),
|
| 32 |
+
"level": opts.get("level", "beginner"),
|
| 33 |
+
"asr_model": opts.get("asr_model"),
|
| 34 |
+
},
|
| 35 |
+
}
|
| 36 |
+
with httpx.Client(timeout=600.0) as client:
|
| 37 |
+
r = client.post(url, json=payload)
|
| 38 |
+
r.raise_for_status()
|
| 39 |
+
out = r.json()
|
| 40 |
+
if not out.get("ok"):
|
| 41 |
+
raise ServiceError(f"Transcription failed: {out}")
|
| 42 |
+
return out
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def run_ingestion(items: List[Dict[str, Any]], ctx: Dict[str, Any], opts: Dict[str, Any]) -> Dict[str, Any]:
|
| 46 |
+
url = f"{INGESTION_URL.rstrip('/')}/run"
|
| 47 |
+
payload = {
|
| 48 |
+
"request_id": "req_ing_001",
|
| 49 |
+
"engine": "ingestion",
|
| 50 |
+
"action": "course.generate",
|
| 51 |
+
"actor": {"user_id": opts.get("user_id", "u1"), "session_id": opts.get("session_id", "s1")},
|
| 52 |
+
"input": {"text": "", "items": items, "refs": {}},
|
| 53 |
+
"context": ctx or {},
|
| 54 |
+
"options": {"level": opts.get("level", "beginner")},
|
| 55 |
+
}
|
| 56 |
+
with httpx.Client(timeout=600.0) as client:
|
| 57 |
+
r = client.post(url, json=payload)
|
| 58 |
+
r.raise_for_status()
|
| 59 |
+
out = r.json()
|
| 60 |
+
if not out.get("ok"):
|
| 61 |
+
raise ServiceError(f"Ingestion failed: {out}")
|
| 62 |
+
return out
|
app/config.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
MEDIA_URL = os.getenv("MEDIA_SERVICE_URL", "http://127.0.0.1:8020")
|
| 5 |
+
TRANSCRIPTION_URL = os.getenv("TRANSCRIPTION_ENGINE_URL", "http://127.0.0.1:8030")
|
| 6 |
+
INGESTION_URL = os.getenv("INGESTION_ENGINE_URL", "http://127.0.0.1:8010")
|
app/main.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from typing import List, Optional, Dict, Any
|
| 4 |
+
|
| 5 |
+
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
| 6 |
+
from starlette.middleware.cors import CORSMiddleware
|
| 7 |
+
|
| 8 |
+
from .clients import upload_to_media, run_transcription, run_ingestion, ServiceError
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
app = FastAPI(title="Learning OS Course Builder (Orchestrator)", version="0.2.0")
|
| 12 |
+
|
| 13 |
+
app.add_middleware(
|
| 14 |
+
CORSMiddleware,
|
| 15 |
+
allow_origins=["*"],
|
| 16 |
+
allow_methods=["*"],
|
| 17 |
+
allow_headers=["*"],
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def is_audio_or_video(content_type: str, filename: str) -> str:
|
| 22 |
+
ct = (content_type or "").lower()
|
| 23 |
+
fn = (filename or "").lower()
|
| 24 |
+
|
| 25 |
+
if ct.startswith("audio/") or fn.endswith((".mp3", ".wav", ".m4a", ".flac", ".ogg")):
|
| 26 |
+
return "audio"
|
| 27 |
+
if ct.startswith("video/") or fn.endswith((".mp4", ".mkv", ".webm", ".mov", ".avi")):
|
| 28 |
+
return "video"
|
| 29 |
+
return ""
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@app.get("/")
|
| 33 |
+
def root():
|
| 34 |
+
return {"message": "Course Builder Orchestrator", "docs_url": "/docs", "health_url": "/health"}
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@app.get("/health")
|
| 38 |
+
def health():
|
| 39 |
+
return {"ok": True}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
@app.post("/create_course")
|
| 43 |
+
async def create_course(
|
| 44 |
+
# Optional text input
|
| 45 |
+
text: str = Form(default=""),
|
| 46 |
+
# Optional multiple files
|
| 47 |
+
files: Optional[List[UploadFile]] = File(default=None),
|
| 48 |
+
|
| 49 |
+
course_title: str = Form(default="Auto Course"),
|
| 50 |
+
level: str = Form(default="beginner"),
|
| 51 |
+
lang: str = Form(default="en"),
|
| 52 |
+
):
|
| 53 |
+
"""
|
| 54 |
+
End-to-end:
|
| 55 |
+
- User can send text, or files, or both.
|
| 56 |
+
- Audio/video files are transcribed automatically.
|
| 57 |
+
- Combined items are sent to ingestion engine to generate a course.
|
| 58 |
+
"""
|
| 59 |
+
items: List[Dict[str, Any]] = []
|
| 60 |
+
uploads_meta: List[Dict[str, Any]] = []
|
| 61 |
+
transcriptions: List[Dict[str, Any]] = []
|
| 62 |
+
|
| 63 |
+
# 1) Add text as an item if present
|
| 64 |
+
if text and text.strip():
|
| 65 |
+
items.append({
|
| 66 |
+
"type": "text",
|
| 67 |
+
"text": text.strip(),
|
| 68 |
+
"ref": "user_text",
|
| 69 |
+
"lang": lang
|
| 70 |
+
})
|
| 71 |
+
|
| 72 |
+
# 2) Handle files if provided
|
| 73 |
+
if files:
|
| 74 |
+
for f in files:
|
| 75 |
+
raw = await f.read()
|
| 76 |
+
if not raw:
|
| 77 |
+
continue
|
| 78 |
+
|
| 79 |
+
media = upload_to_media(
|
| 80 |
+
file_bytes=raw,
|
| 81 |
+
filename=f.filename or "upload.bin",
|
| 82 |
+
content_type=f.content_type or "application/octet-stream"
|
| 83 |
+
)
|
| 84 |
+
uploads_meta.append(media)
|
| 85 |
+
media_id = media["media_id"]
|
| 86 |
+
|
| 87 |
+
kind = is_audio_or_video(media.get("content_type"), media.get("filename"))
|
| 88 |
+
|
| 89 |
+
# 3) Transcribe audio/video into items
|
| 90 |
+
if kind in ("audio", "video"):
|
| 91 |
+
action = "transcribe.video" if kind == "video" else "transcribe.audio"
|
| 92 |
+
trans = run_transcription(
|
| 93 |
+
media_id=media_id,
|
| 94 |
+
action=action,
|
| 95 |
+
opts={"level": level, "lang": lang}
|
| 96 |
+
)
|
| 97 |
+
transcriptions.append(trans["result"])
|
| 98 |
+
items.extend(trans["result"]["items"])
|
| 99 |
+
|
| 100 |
+
else:
|
| 101 |
+
# Not supported yet in MVP (image/doc). We still store it and return the media_id.
|
| 102 |
+
# Next: vision engine + doc parser.
|
| 103 |
+
pass
|
| 104 |
+
|
| 105 |
+
# 4) Must have at least one usable item
|
| 106 |
+
if not items:
|
| 107 |
+
raise HTTPException(
|
| 108 |
+
status_code=400,
|
| 109 |
+
detail="Provide text and/or at least one audio/video file to generate a course."
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# 5) Ingest -> course
|
| 113 |
+
try:
|
| 114 |
+
ing = run_ingestion(
|
| 115 |
+
items=items,
|
| 116 |
+
ctx={"course_title": course_title},
|
| 117 |
+
opts={"level": level, "lang": lang}
|
| 118 |
+
)
|
| 119 |
+
except ServiceError as e:
|
| 120 |
+
raise HTTPException(status_code=502, detail=str(e))
|
| 121 |
+
|
| 122 |
+
return {
|
| 123 |
+
"ok": True,
|
| 124 |
+
"inputs_summary": {
|
| 125 |
+
"text_provided": bool(text and text.strip()),
|
| 126 |
+
"files_uploaded": len(uploads_meta),
|
| 127 |
+
"items_sent_to_ingestion": len(items),
|
| 128 |
+
"unsupported_files": [
|
| 129 |
+
m for m in uploads_meta
|
| 130 |
+
if is_audio_or_video(m.get("content_type"), m.get("filename")) == ""
|
| 131 |
+
],
|
| 132 |
+
},
|
| 133 |
+
"uploads": uploads_meta,
|
| 134 |
+
"transcriptions": transcriptions,
|
| 135 |
+
"course_result": ing["result"],
|
| 136 |
+
"messages": [
|
| 137 |
+
"Uploaded files to media service",
|
| 138 |
+
"Transcribed audio/video (video supported)",
|
| 139 |
+
"Generated course from combined inputs (text + transcripts)",
|
| 140 |
+
],
|
| 141 |
+
}
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.115.6
|
| 2 |
+
uvicorn[standard]==0.30.6
|
| 3 |
+
httpx==0.27.2
|
| 4 |
+
python-multipart==0.0.9
|
| 5 |
+
pydantic==2.9.2
|