Spaces:

Godswill-IoT
/

cb-engine

Sleeping

App Files Files Community

Godswill-IoT commited on Jan 30

Commit

3038768

verified ·

1 Parent(s): 3a3f3f9

Upload 5 files

Browse files

Files changed (5) hide show

app/_init_.py +0 -0
app/clients.py +62 -0
app/config.py +6 -0
app/main.py +141 -0
requirements.txt +5 -0

app/_init_.py ADDED Viewed

File without changes

app/clients.py ADDED Viewed

	@@ -0,0 +1,62 @@

+from __future__ import annotations
+from typing import Any, Dict, List, Optional
+import httpx
+from .config import MEDIA_URL, TRANSCRIPTION_URL, INGESTION_URL
+class ServiceError(Exception):
+    pass
+def upload_to_media(file_bytes: bytes, filename: str, content_type: str) -> Dict[str, Any]:
+    url = f"{MEDIA_URL.rstrip('/')}/media/upload"
+    files = {"file": (filename, file_bytes, content_type or "application/octet-stream")}
+    with httpx.Client(timeout=180.0) as client:
+        r = client.post(url, files=files, data={"tag": "course_builder", "source": "orchestrator"})
+        r.raise_for_status()
+        return r.json()
+def run_transcription(media_id: str, action: str, opts: Dict[str, Any]) -> Dict[str, Any]:
+    url = f"{TRANSCRIPTION_URL.rstrip('/')}/run"
+    payload = {
+        "request_id": f"req_trans_{media_id}",
+        "engine": "transcription",
+        "action": action,  # transcribe.audio | transcribe.video
+        "actor": {"user_id": opts.get("user_id", "u1"), "session_id": opts.get("session_id", "s1")},
+        "input": {"refs": {"media_id": media_id}, "items": [], "text": ""},
+        "context": {},
+        "options": {
+            "lang": opts.get("lang", "en"),
+            "level": opts.get("level", "beginner"),
+            "asr_model": opts.get("asr_model"),
+        },
+    }
+    with httpx.Client(timeout=600.0) as client:
+        r = client.post(url, json=payload)
+        r.raise_for_status()
+        out = r.json()
+    if not out.get("ok"):
+        raise ServiceError(f"Transcription failed: {out}")
+    return out
+def run_ingestion(items: List[Dict[str, Any]], ctx: Dict[str, Any], opts: Dict[str, Any]) -> Dict[str, Any]:
+    url = f"{INGESTION_URL.rstrip('/')}/run"
+    payload = {
+        "request_id": "req_ing_001",
+        "engine": "ingestion",
+        "action": "course.generate",
+        "actor": {"user_id": opts.get("user_id", "u1"), "session_id": opts.get("session_id", "s1")},
+        "input": {"text": "", "items": items, "refs": {}},
+        "context": ctx or {},
+        "options": {"level": opts.get("level", "beginner")},
+    }
+    with httpx.Client(timeout=600.0) as client:
+        r = client.post(url, json=payload)
+        r.raise_for_status()
+        out = r.json()
+    if not out.get("ok"):
+        raise ServiceError(f"Ingestion failed: {out}")
+    return out

app/config.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from __future__ import annotations
+import os
+MEDIA_URL = os.getenv("MEDIA_SERVICE_URL", "http://127.0.0.1:8020")
+TRANSCRIPTION_URL = os.getenv("TRANSCRIPTION_ENGINE_URL", "http://127.0.0.1:8030")
+INGESTION_URL = os.getenv("INGESTION_ENGINE_URL", "http://127.0.0.1:8010")

app/main.py ADDED Viewed

	@@ -0,0 +1,141 @@

+from __future__ import annotations
+from typing import List, Optional, Dict, Any
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from starlette.middleware.cors import CORSMiddleware
+from .clients import upload_to_media, run_transcription, run_ingestion, ServiceError
+app = FastAPI(title="Learning OS Course Builder (Orchestrator)", version="0.2.0")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+def is_audio_or_video(content_type: str, filename: str) -> str:
+    ct = (content_type or "").lower()
+    fn = (filename or "").lower()
+    if ct.startswith("audio/") or fn.endswith((".mp3", ".wav", ".m4a", ".flac", ".ogg")):
+        return "audio"
+    if ct.startswith("video/") or fn.endswith((".mp4", ".mkv", ".webm", ".mov", ".avi")):
+        return "video"
+    return ""
+@app.get("/")
+def root():
+    return {"message": "Course Builder Orchestrator", "docs_url": "/docs", "health_url": "/health"}
+@app.get("/health")
+def health():
+    return {"ok": True}
+@app.post("/create_course")
+async def create_course(
+    # Optional text input
+    text: str = Form(default=""),
+    # Optional multiple files
+    files: Optional[List[UploadFile]] = File(default=None),
+    course_title: str = Form(default="Auto Course"),
+    level: str = Form(default="beginner"),
+    lang: str = Form(default="en"),
+):
+    """
+    End-to-end:
+    - User can send text, or files, or both.
+    - Audio/video files are transcribed automatically.
+    - Combined items are sent to ingestion engine to generate a course.
+    """
+    items: List[Dict[str, Any]] = []
+    uploads_meta: List[Dict[str, Any]] = []
+    transcriptions: List[Dict[str, Any]] = []
+    # 1) Add text as an item if present
+    if text and text.strip():
+        items.append({
+            "type": "text",
+            "text": text.strip(),
+            "ref": "user_text",
+            "lang": lang
+        })
+    # 2) Handle files if provided
+    if files:
+        for f in files:
+            raw = await f.read()
+            if not raw:
+                continue
+            media = upload_to_media(
+                file_bytes=raw,
+                filename=f.filename or "upload.bin",
+                content_type=f.content_type or "application/octet-stream"
+            )
+            uploads_meta.append(media)
+            media_id = media["media_id"]
+            kind = is_audio_or_video(media.get("content_type"), media.get("filename"))
+            # 3) Transcribe audio/video into items
+            if kind in ("audio", "video"):
+                action = "transcribe.video" if kind == "video" else "transcribe.audio"
+                trans = run_transcription(
+                    media_id=media_id,
+                    action=action,
+                    opts={"level": level, "lang": lang}
+                )
+                transcriptions.append(trans["result"])
+                items.extend(trans["result"]["items"])
+            else:
+                # Not supported yet in MVP (image/doc). We still store it and return the media_id.
+                # Next: vision engine + doc parser.
+                pass
+    # 4) Must have at least one usable item
+    if not items:
+        raise HTTPException(
+            status_code=400,
+            detail="Provide text and/or at least one audio/video file to generate a course."
+        )
+    # 5) Ingest -> course
+    try:
+        ing = run_ingestion(
+            items=items,
+            ctx={"course_title": course_title},
+            opts={"level": level, "lang": lang}
+        )
+    except ServiceError as e:
+        raise HTTPException(status_code=502, detail=str(e))
+    return {
+        "ok": True,
+        "inputs_summary": {
+            "text_provided": bool(text and text.strip()),
+            "files_uploaded": len(uploads_meta),
+            "items_sent_to_ingestion": len(items),
+            "unsupported_files": [
+                m for m in uploads_meta
+                if is_audio_or_video(m.get("content_type"), m.get("filename")) == ""
+            ],
+        },
+        "uploads": uploads_meta,
+        "transcriptions": transcriptions,
+        "course_result": ing["result"],
+        "messages": [
+            "Uploaded files to media service",
+            "Transcribed audio/video (video supported)",
+            "Generated course from combined inputs (text + transcripts)",
+        ],
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi==0.115.6
+uvicorn[standard]==0.30.6
+httpx==0.27.2
+python-multipart==0.0.9
+pydantic==2.9.2