Godswill-IoT commited on
Commit
3038768
·
verified ·
1 Parent(s): 3a3f3f9

Upload 5 files

Browse files
Files changed (5) hide show
  1. app/_init_.py +0 -0
  2. app/clients.py +62 -0
  3. app/config.py +6 -0
  4. app/main.py +141 -0
  5. requirements.txt +5 -0
app/_init_.py ADDED
File without changes
app/clients.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Any, Dict, List, Optional
3
+ import httpx
4
+
5
+ from .config import MEDIA_URL, TRANSCRIPTION_URL, INGESTION_URL
6
+
7
+
8
+ class ServiceError(Exception):
9
+ pass
10
+
11
+
12
+ def upload_to_media(file_bytes: bytes, filename: str, content_type: str) -> Dict[str, Any]:
13
+ url = f"{MEDIA_URL.rstrip('/')}/media/upload"
14
+ files = {"file": (filename, file_bytes, content_type or "application/octet-stream")}
15
+ with httpx.Client(timeout=180.0) as client:
16
+ r = client.post(url, files=files, data={"tag": "course_builder", "source": "orchestrator"})
17
+ r.raise_for_status()
18
+ return r.json()
19
+
20
+
21
+ def run_transcription(media_id: str, action: str, opts: Dict[str, Any]) -> Dict[str, Any]:
22
+ url = f"{TRANSCRIPTION_URL.rstrip('/')}/run"
23
+ payload = {
24
+ "request_id": f"req_trans_{media_id}",
25
+ "engine": "transcription",
26
+ "action": action, # transcribe.audio | transcribe.video
27
+ "actor": {"user_id": opts.get("user_id", "u1"), "session_id": opts.get("session_id", "s1")},
28
+ "input": {"refs": {"media_id": media_id}, "items": [], "text": ""},
29
+ "context": {},
30
+ "options": {
31
+ "lang": opts.get("lang", "en"),
32
+ "level": opts.get("level", "beginner"),
33
+ "asr_model": opts.get("asr_model"),
34
+ },
35
+ }
36
+ with httpx.Client(timeout=600.0) as client:
37
+ r = client.post(url, json=payload)
38
+ r.raise_for_status()
39
+ out = r.json()
40
+ if not out.get("ok"):
41
+ raise ServiceError(f"Transcription failed: {out}")
42
+ return out
43
+
44
+
45
+ def run_ingestion(items: List[Dict[str, Any]], ctx: Dict[str, Any], opts: Dict[str, Any]) -> Dict[str, Any]:
46
+ url = f"{INGESTION_URL.rstrip('/')}/run"
47
+ payload = {
48
+ "request_id": "req_ing_001",
49
+ "engine": "ingestion",
50
+ "action": "course.generate",
51
+ "actor": {"user_id": opts.get("user_id", "u1"), "session_id": opts.get("session_id", "s1")},
52
+ "input": {"text": "", "items": items, "refs": {}},
53
+ "context": ctx or {},
54
+ "options": {"level": opts.get("level", "beginner")},
55
+ }
56
+ with httpx.Client(timeout=600.0) as client:
57
+ r = client.post(url, json=payload)
58
+ r.raise_for_status()
59
+ out = r.json()
60
+ if not out.get("ok"):
61
+ raise ServiceError(f"Ingestion failed: {out}")
62
+ return out
app/config.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ import os
3
+
4
+ MEDIA_URL = os.getenv("MEDIA_SERVICE_URL", "http://127.0.0.1:8020")
5
+ TRANSCRIPTION_URL = os.getenv("TRANSCRIPTION_ENGINE_URL", "http://127.0.0.1:8030")
6
+ INGESTION_URL = os.getenv("INGESTION_ENGINE_URL", "http://127.0.0.1:8010")
app/main.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Optional, Dict, Any
4
+
5
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
6
+ from starlette.middleware.cors import CORSMiddleware
7
+
8
+ from .clients import upload_to_media, run_transcription, run_ingestion, ServiceError
9
+
10
+
11
+ app = FastAPI(title="Learning OS Course Builder (Orchestrator)", version="0.2.0")
12
+
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_methods=["*"],
17
+ allow_headers=["*"],
18
+ )
19
+
20
+
21
+ def is_audio_or_video(content_type: str, filename: str) -> str:
22
+ ct = (content_type or "").lower()
23
+ fn = (filename or "").lower()
24
+
25
+ if ct.startswith("audio/") or fn.endswith((".mp3", ".wav", ".m4a", ".flac", ".ogg")):
26
+ return "audio"
27
+ if ct.startswith("video/") or fn.endswith((".mp4", ".mkv", ".webm", ".mov", ".avi")):
28
+ return "video"
29
+ return ""
30
+
31
+
32
+ @app.get("/")
33
+ def root():
34
+ return {"message": "Course Builder Orchestrator", "docs_url": "/docs", "health_url": "/health"}
35
+
36
+
37
+ @app.get("/health")
38
+ def health():
39
+ return {"ok": True}
40
+
41
+
42
+ @app.post("/create_course")
43
+ async def create_course(
44
+ # Optional text input
45
+ text: str = Form(default=""),
46
+ # Optional multiple files
47
+ files: Optional[List[UploadFile]] = File(default=None),
48
+
49
+ course_title: str = Form(default="Auto Course"),
50
+ level: str = Form(default="beginner"),
51
+ lang: str = Form(default="en"),
52
+ ):
53
+ """
54
+ End-to-end:
55
+ - User can send text, or files, or both.
56
+ - Audio/video files are transcribed automatically.
57
+ - Combined items are sent to ingestion engine to generate a course.
58
+ """
59
+ items: List[Dict[str, Any]] = []
60
+ uploads_meta: List[Dict[str, Any]] = []
61
+ transcriptions: List[Dict[str, Any]] = []
62
+
63
+ # 1) Add text as an item if present
64
+ if text and text.strip():
65
+ items.append({
66
+ "type": "text",
67
+ "text": text.strip(),
68
+ "ref": "user_text",
69
+ "lang": lang
70
+ })
71
+
72
+ # 2) Handle files if provided
73
+ if files:
74
+ for f in files:
75
+ raw = await f.read()
76
+ if not raw:
77
+ continue
78
+
79
+ media = upload_to_media(
80
+ file_bytes=raw,
81
+ filename=f.filename or "upload.bin",
82
+ content_type=f.content_type or "application/octet-stream"
83
+ )
84
+ uploads_meta.append(media)
85
+ media_id = media["media_id"]
86
+
87
+ kind = is_audio_or_video(media.get("content_type"), media.get("filename"))
88
+
89
+ # 3) Transcribe audio/video into items
90
+ if kind in ("audio", "video"):
91
+ action = "transcribe.video" if kind == "video" else "transcribe.audio"
92
+ trans = run_transcription(
93
+ media_id=media_id,
94
+ action=action,
95
+ opts={"level": level, "lang": lang}
96
+ )
97
+ transcriptions.append(trans["result"])
98
+ items.extend(trans["result"]["items"])
99
+
100
+ else:
101
+ # Not supported yet in MVP (image/doc). We still store it and return the media_id.
102
+ # Next: vision engine + doc parser.
103
+ pass
104
+
105
+ # 4) Must have at least one usable item
106
+ if not items:
107
+ raise HTTPException(
108
+ status_code=400,
109
+ detail="Provide text and/or at least one audio/video file to generate a course."
110
+ )
111
+
112
+ # 5) Ingest -> course
113
+ try:
114
+ ing = run_ingestion(
115
+ items=items,
116
+ ctx={"course_title": course_title},
117
+ opts={"level": level, "lang": lang}
118
+ )
119
+ except ServiceError as e:
120
+ raise HTTPException(status_code=502, detail=str(e))
121
+
122
+ return {
123
+ "ok": True,
124
+ "inputs_summary": {
125
+ "text_provided": bool(text and text.strip()),
126
+ "files_uploaded": len(uploads_meta),
127
+ "items_sent_to_ingestion": len(items),
128
+ "unsupported_files": [
129
+ m for m in uploads_meta
130
+ if is_audio_or_video(m.get("content_type"), m.get("filename")) == ""
131
+ ],
132
+ },
133
+ "uploads": uploads_meta,
134
+ "transcriptions": transcriptions,
135
+ "course_result": ing["result"],
136
+ "messages": [
137
+ "Uploaded files to media service",
138
+ "Transcribed audio/video (video supported)",
139
+ "Generated course from combined inputs (text + transcripts)",
140
+ ],
141
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.115.6
2
+ uvicorn[standard]==0.30.6
3
+ httpx==0.27.2
4
+ python-multipart==0.0.9
5
+ pydantic==2.9.2