github-actions[bot] commited on
Commit
099df87
·
0 Parent(s):

Deploy backend with correct structure

Browse files
Files changed (9) hide show
  1. Dockerfile +45 -0
  2. README.md +4 -0
  3. __init__.py +0 -0
  4. api/crew.py +184 -0
  5. api/db.py +73 -0
  6. api/models.py +9 -0
  7. api/routes.py +53 -0
  8. main.py +23 -0
  9. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # === STAGE 1: The Builder (Downloads the model) ===
2
+ FROM python:3.10-slim as builder
3
+ RUN pip install --no-cache-dir sentence-transformers
4
+ ENV MODEL_NAME=sentence-transformers/all-MiniLM-L6-v2
5
+ ENV SAVE_PATH=/opt/models/all-MiniLM-L6-v2
6
+ RUN python -c "import os; from sentence_transformers import SentenceTransformer; SentenceTransformer(os.getenv('MODEL_NAME')).save(os.getenv('SAVE_PATH'))"
7
+
8
+ # === STAGE 2: The Final Application Image ===
9
+ FROM python:3.11-slim
10
+
11
+ # Set environment variables for writable cache directories (matching your reference)
12
+ ENV PYTHONDONTWRITEBYTECODE=1 \
13
+ PYTHONUNBUFFERED=1 \
14
+ NLTK_DATA=/tmp/nltk_data \
15
+ TRANSFORMERS_CACHE=/tmp/transformers_cache \
16
+ HF_HOME=/tmp/huggingface \
17
+ PYTHONPATH=/app
18
+
19
+ # Set work directory
20
+ WORKDIR /app
21
+
22
+ # Install system dependencies (if needed)
23
+ RUN apt-get update && apt-get install -y \
24
+ build-essential \
25
+ && rm -rf /var/lib/apt/lists/*
26
+
27
+ # Create writable cache directories
28
+ RUN mkdir -p /tmp/nltk_data /tmp/transformers_cache /tmp/huggingface
29
+
30
+ # Install Python dependencies
31
+ COPY requirements.txt .
32
+ RUN pip install --no-cache-dir -r requirements.txt
33
+
34
+ # Copy your application code (exactly like your reference)
35
+ COPY main.py .
36
+ COPY api/ ./api/
37
+
38
+ # Copy the pre-downloaded model from the builder stage
39
+ COPY --from=builder /opt/models/all-MiniLM-L6-v2 ./models/all-MiniLM-L6-v2
40
+
41
+ # Expose the port
42
+ EXPOSE 7860
43
+
44
+ # Run the application (exactly like your reference)
45
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ---
2
+ title: Research Flowstream (Backend)
3
+ sdk: docker
4
+ ---
__init__.py ADDED
File without changes
api/crew.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import asyncio
4
+ import json
5
+ import requests
6
+ from typing import AsyncGenerator
7
+ from dotenv import load_dotenv
8
+
9
+ # Load .env so env vars are available when starting Uvicorn directly
10
+ load_dotenv()
11
+
12
+ # Groq configuration
13
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()
14
+ GROQ_MODEL = os.getenv("GROQ_MODEL", "llama-3.1-8b-instant").strip()
15
+ GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
16
+
17
+ # Optional switch to simulate local behavior (no external calls)
18
+ GROQ_DISABLED = os.getenv("GROQ_DISABLED", "").lower() in {"1", "true", "yes"}
19
+
20
+ # Reasonable connect/read timeouts for generation/streaming
21
+ DEFAULT_TIMEOUT = (10, 120)
22
+
23
+ # Base headers for Groq API
24
+ HEADERS = {
25
+ "Authorization": f"Bearer {GROQ_API_KEY}" if GROQ_API_KEY else "",
26
+ "Content-Type": "application/json",
27
+ }
28
+
29
+
30
+ def generate_report_id() -> str:
31
+ """Create a unique ID for each report."""
32
+ return str(uuid.uuid4())
33
+
34
+
35
+ def stream_event(kind: str, data):
36
+ """
37
+ Serialize events as proper JSON for SSE.
38
+ The FastAPI route will send lines like: `data: <json>\n\n`
39
+ Frontend can safely parse with json.loads(payload).
40
+ """
41
+ return json.dumps({"kind": kind, "data": data}, ensure_ascii=False)
42
+
43
+
44
+ def _chunk(text: str, n: int):
45
+ """Split text into small pieces to render a smoother streaming experience."""
46
+ for i in range(0, len(text), n):
47
+ yield text[i : i + n]
48
+
49
+
50
+ async def run_researcher_async(topic: str) -> str:
51
+ """
52
+ Researcher step: produce compact factual bullets.
53
+ Fallback text is returned if GROQ is disabled or unavailable.
54
+ """
55
+ if GROQ_DISABLED or not GROQ_API_KEY:
56
+ return (
57
+ f"- What is '{topic}'?\n"
58
+ f"- 3–5 key facts\n"
59
+ f"- Common use cases\n"
60
+ f"- Simple examples\n"
61
+ )
62
+
63
+ payload = {
64
+ "model": GROQ_MODEL,
65
+ "messages": [
66
+ {"role": "system", "content": "You are a concise researcher."},
67
+ {
68
+ "role": "user",
69
+ "content": f"Provide compact, factual bullet points about '{topic}'. "
70
+ f"Max 8 bullets. Avoid filler text.",
71
+ },
72
+ ],
73
+ "temperature": 0.5,
74
+ }
75
+ try:
76
+ r = requests.post(GROQ_URL, headers=HEADERS, json=payload, timeout=DEFAULT_TIMEOUT)
77
+ r.raise_for_status()
78
+ return r.json()["choices"][0]["message"]["content"]
79
+ except Exception as e:
80
+ # Fallback on any network/API error
81
+ return f"[fallback researcher due to error: {e}]\n- Background\n- Key points\n- Examples"
82
+
83
+
84
+ async def run_analyst_async(researcher_notes: str) -> str:
85
+ """
86
+ Analyst step: extract key insights and implications from researcher notes.
87
+ Fallback text is returned if GROQ is disabled or unavailable.
88
+ """
89
+ if GROQ_DISABLED or not GROQ_API_KEY:
90
+ return "- 3 key insights\n- 2 implications\n- 1 trade-off\n"
91
+
92
+ payload = {
93
+ "model": GROQ_MODEL,
94
+ "messages": [
95
+ {"role": "system", "content": "You extract insights cleanly."},
96
+ {
97
+ "role": "user",
98
+ "content": f"From these notes, produce exactly 3 insights and 2 implications:\n{researcher_notes}",
99
+ },
100
+ ],
101
+ "temperature": 0.5,
102
+ }
103
+ try:
104
+ r = requests.post(GROQ_URL, headers=HEADERS, json=payload, timeout=DEFAULT_TIMEOUT)
105
+ r.raise_for_status()
106
+ return r.json()["choices"][0]["message"]["content"]
107
+ except Exception as e:
108
+ return f"[fallback analyst due to error: {e}]\n- Insight 1\n- Insight 2\n- Insight 3\n- Implication A\n- Implication B"
109
+
110
+
111
+ async def run_writer_token_stream(
112
+ topic: str,
113
+ researcher_notes: str,
114
+ analyst_notes: str,
115
+ ) -> AsyncGenerator[str, None]:
116
+ """
117
+ Writer step: stream the final report as small token-like chunks for smooth UI updates.
118
+ Yields strings (small chunks). Caller accumulates or forwards as SSE tokens.
119
+ """
120
+ writer_prompt = (
121
+ "Write a clear, beginner-friendly report with markdown headings:\n"
122
+ "Sections: Introduction, Key Concepts, Insights, Practical Tips, Conclusion.\n"
123
+ "Use concise language and bullets where helpful.\n\n"
124
+ f"Topic: {topic}\n\n"
125
+ f"Researcher Notes:\n{researcher_notes}\n\n"
126
+ f"Analyst Notes:\n{analyst_notes}\n"
127
+ )
128
+
129
+ # Local simulated streaming if GROQ is disabled or key missing
130
+ if GROQ_DISABLED or not GROQ_API_KEY:
131
+ simulated = [
132
+ f"## {topic}\n\n",
133
+ "### Introduction\n",
134
+ "This response is streaming locally to simulate real-time typing.\n\n",
135
+ "### Key Concepts\n",
136
+ "- Concept A\n- Concept B\n\n",
137
+ "### Insights\n",
138
+ "- Insight 1\n- Insight 2\n\n",
139
+ "### Practical Tips\n",
140
+ "- Tip 1\n- Tip 2\n\n",
141
+ "### Conclusion\n",
142
+ "Short summary.\n",
143
+ ]
144
+ for piece in simulated:
145
+ for small in _chunk(piece, 20):
146
+ yield small
147
+ await asyncio.sleep(0.015)
148
+ return
149
+
150
+ # Real streaming via Groq's OpenAI-compatible API
151
+ payload = {
152
+ "model": GROQ_MODEL,
153
+ "messages": [
154
+ {"role": "system", "content": "You are a clear, helpful technical writer."},
155
+ {"role": "user", "content": writer_prompt},
156
+ ],
157
+ "temperature": 0.6,
158
+ "stream": True,
159
+ }
160
+
161
+ # Using requests stream; iterate server-sent "data: ..." lines
162
+ with requests.post(
163
+ GROQ_URL, headers=HEADERS, json=payload, stream=True, timeout=DEFAULT_TIMEOUT
164
+ ) as resp:
165
+ resp.raise_for_status()
166
+ for line in resp.iter_lines(decode_unicode=True):
167
+ if not line:
168
+ continue
169
+ if not line.startswith("data: "):
170
+ continue
171
+ data = line[6:].strip()
172
+ if data == "[DONE]":
173
+ break
174
+ try:
175
+ obj = json.loads(data)
176
+ delta = obj["choices"][0]["delta"].get("content", "")
177
+ if not delta:
178
+ continue
179
+ # Yield tiny chunks to update UI frequently
180
+ for small in _chunk(delta, 20):
181
+ yield small
182
+ except Exception:
183
+ # Skip malformed lines gracefully
184
+ continue
api/db.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Dict, Any
3
+ from dotenv import load_dotenv
4
+ from qdrant_client import QdrantClient
5
+ from qdrant_client.models import PointStruct, VectorParams, Distance
6
+ from sentence_transformers import SentenceTransformer
7
+
8
+ # Load environment variables from a .env file if it exists
9
+ load_dotenv()
10
+
11
+ # --- Qdrant Configuration ---
12
+ QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333").strip()
13
+ QDRANT_API_KEY = os.getenv("QDRANT_API_KEY", "").strip()
14
+ COLLECTION = "reports"
15
+
16
+ # --- Model Loading (from local files within the Docker image) ---
17
+ # This path corresponds to where the Dockerfile copies the model.
18
+ MODEL_PATH = "./models/all-MiniLM-L6-v2"
19
+
20
+ # Initialize the embedding model from the local path.
21
+ try:
22
+ print(f"Loading sentence-transformer model from local path: {MODEL_PATH}")
23
+ embedding_model = SentenceTransformer(MODEL_PATH)
24
+ print("✅ Model loaded successfully!")
25
+ except Exception as e:
26
+ print(f"❌ FATAL: Could not load the embedding model from {MODEL_PATH}.")
27
+ print("This indicates an issue with the Docker build or the file path in db.py.")
28
+ raise e
29
+
30
+ # --- Qdrant Client and Collection Setup ---
31
+ def _make_client() -> QdrantClient:
32
+ """Creates a Qdrant client based on environment variables."""
33
+ if QDRANT_API_KEY:
34
+ return QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY, timeout=30.0, check_compatibility=False)
35
+ else:
36
+ return QdrantClient(url=QDRANT_URL, timeout=30.0, check_compatibility=False)
37
+
38
+ qdrant = _make_client()
39
+
40
+ def _ensure_collection():
41
+ """Ensures the Qdrant collection exists, creating it if necessary."""
42
+ try:
43
+ qdrant.get_collection(collection_name=COLLECTION)
44
+ print(f"✅ Collection '{COLLECTION}' already exists.")
45
+ except Exception:
46
+ print(f"🔧 Collection '{COLLECTION}' not found. Creating it...")
47
+ qdrant.create_collection(
48
+ collection_name=COLLECTION,
49
+ vectors_config=VectorParams(size=384, distance=Distance.COSINE),
50
+ )
51
+ print("✅ Collection created.")
52
+
53
+ _ensure_collection()
54
+
55
+ # --- Database Functions ---
56
+ def save_report(report_id: str, text: str, title: str):
57
+ """Encodes and saves a report to Qdrant."""
58
+ vector = embedding_model.encode(text).tolist()
59
+ qdrant.upsert(
60
+ collection_name=COLLECTION,
61
+ points=[PointStruct(id=report_id, vector=vector, payload={"text": text, "title": title})],
62
+ )
63
+
64
+ def list_reports() -> List[Dict[str, Any]]:
65
+ """Lists recent reports, including their titles."""
66
+ hits, _ = qdrant.scroll(collection_name=COLLECTION, limit=50)
67
+ return [{"id": h.id, "title": h.payload.get("title", "(untitled)"), "text": h.payload.get("text", "")} for h in hits]
68
+
69
+ def search_reports(query: str) -> List[Dict[str, Any]]:
70
+ """Performs semantic search and returns reports with titles."""
71
+ vector = embedding_model.encode(query).tolist()
72
+ hits = qdrant.search(collection_name=COLLECTION, query_vector=vector, limit=5)
73
+ return [{"id": hit.id, "score": float(hit.score), "title": hit.payload.get("title", "(untitled)"), "text": hit.payload.get("text", "")} for hit in hits]
api/models.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Pydantic schemas
2
+ from pydantic import BaseModel
3
+
4
+ class ResearchRequest(BaseModel):
5
+ topic: str
6
+
7
+ class SearchRequest(BaseModel):
8
+ query: str
9
+
api/routes.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from fastapi.responses import StreamingResponse
3
+ from .models import ResearchRequest, SearchRequest # relative import
4
+ from . import db, crew # relative import
5
+
6
+ router = APIRouter()
7
+
8
+ @router.post("/start-job-stream")
9
+ async def start_research_job_stream(request: ResearchRequest):
10
+ async def event_generator():
11
+ # Researcher
12
+ yield f"data: {crew.stream_event('stage', 'researcher:start')}\n\n"
13
+ researcher_notes = await crew.run_researcher_async(request.topic)
14
+ yield f"data: {crew.stream_event('stage', 'researcher:done')}\n\n"
15
+
16
+ # Analyst
17
+ yield f"data: {crew.stream_event('stage', 'analyst:start')}\n\n"
18
+ analyst_notes = await crew.run_analyst_async(researcher_notes)
19
+ yield f"data: {crew.stream_event('stage', 'analyst:done')}\n\n"
20
+
21
+ # Writer: token stream
22
+ yield f"data: {crew.stream_event('stage', 'writer:start')}\n\n"
23
+ final_accum = []
24
+ async for token in crew.run_writer_token_stream(
25
+ topic=request.topic,
26
+ researcher_notes=researcher_notes,
27
+ analyst_notes=analyst_notes,
28
+ ):
29
+ final_accum.append(token)
30
+ yield f"data: {crew.stream_event('token', token)}\n\n"
31
+
32
+ full_text = "".join(final_accum)
33
+ yield f"data: {crew.stream_event('stage', 'writer:done')}\n\n"
34
+
35
+ # Save exactly what was streamed
36
+ report_id = crew.generate_report_id()
37
+ db.save_report(report_id, full_text, title=request.topic)
38
+
39
+ # Final event (build dict first to avoid f-string brace issues)
40
+ meta = {"report_id": report_id, "title": request.topic}
41
+ yield f"data: {crew.stream_event('final', meta)}\n\n"
42
+ yield "event: close\ndata: done\n\n"
43
+
44
+ # Important for SSE: ensure streaming MIME and no buffering on your proxy
45
+ return StreamingResponse(event_generator(), media_type="text/event-stream")
46
+
47
+ @router.get("/list-reports")
48
+ async def list_reports():
49
+ return db.list_reports()
50
+
51
+ @router.post("/search-reports")
52
+ async def search_reports(request: SearchRequest):
53
+ return db.search_reports(request.query)
main.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+
4
+ # Updated import - since 'api' folder will be directly under /app in the container
5
+ from api.routes import router
6
+
7
+ app = FastAPI(title="Multi-Agent Research Assistant Backend")
8
+
9
+ # Register routes
10
+ app.include_router(router)
11
+
12
+ # CORS middleware
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"], # tighten in prod
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ @app.get("/")
22
+ async def root():
23
+ return {"message": "Backend is running. Check /docs for API details."}
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Backend dependencies
2
+ fastapi
3
+ uvicorn
4
+ qdrant-client
5
+ sentence-transformers
6
+ requests
7
+ python-dotenv
8
+