vishalkatheriya commited on
Commit
24773d4
Β·
verified Β·
1 Parent(s): 0bd8166

Upload 14 files

Browse files
src/README.md ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Handbook Generator (AI Engineering Assignment)
2
+
3
+ Same technology as the **policy** project: **Google ADK**, **LiteLLM** (OpenAI), and **Streamlit**. The UI talks to the ADK agent **directly** (no API). The agent uses a **RAG tool** (ChromaDB) to answer from uploaded PDFs.
4
+
5
+ ## Features
6
+
7
+ - **PDF upload** β€” Upload PDFs; text is extracted, chunked, embedded (OpenAI), stored in **ChromaDB** (local).
8
+ - **Chat** β€” ADK agent runs in the same process; agent calls **RAG tool** `query_uploaded_documents` and answers with the LLM (OpenAI via LiteLLM).
9
+ - **Handbook generation** β€” Request a 20,000+ word handbook; generation uses RAG and runs section-by-section.
10
+ - **Export** β€” Download the handbook as Markdown.
11
+
12
+ ## Architecture
13
+
14
+ - **Streamlit** (`streamlit_app.py`) β†’ UI; imports **runner_app** to run the agent directly.
15
+ - **runner_app.py** β†’ ADK Runner + session; `run_chat(message)` runs the agent (sync wrapper around `runner.run_async`).
16
+ - **agent.py** β†’ ADK Agent (LiteLLM/OpenAI), tools = `[query_uploaded_documents]`.
17
+ - **RAG** (`rag.py` + `rag_tools.py`) β†’ ChromaDB + OpenAI embeddings.
18
+
19
+ ## Setup
20
+
21
+ ### 1. Python
22
+
23
+ Use Python 3.10+.
24
+
25
+ ### 2. Install dependencies
26
+
27
+ ```bash
28
+ cd ass2
29
+ pip install -r requirements.txt
30
+ ```
31
+
32
+ ### 3. Environment
33
+
34
+ Create a `.env` file in `ass2` (see `.env.example`):
35
+
36
+ ```
37
+ OPENAI_API_KEY=sk-your-openai-api-key-here
38
+ MODEL=gpt-4o
39
+ ```
40
+
41
+ ## Run (single command)
42
+
43
+ ```bash
44
+ cd ass2
45
+ streamlit run streamlit_app.py
46
+ ```
47
+
48
+ Open **http://localhost:8501**. No separate API server.
49
+
50
+ ## How to use
51
+
52
+ 1. **Upload PDFs** β€” In "Upload PDFs", select PDFs and click **Index PDFs**.
53
+ 2. **Chat** β€” In "Chat", ask questions; the ADK agent uses the RAG tool and answers from your documents.
54
+ 3. **Generate handbook** β€” In "Generate Handbook", enter a topic and click **Generate handbook**, then download as Markdown.
55
+
56
+ ## Project structure (all in `ass2`)
57
+
58
+ | File | Purpose |
59
+ |------|--------|
60
+ | `streamlit_app.py` | Streamlit UI (upload, chat, handbook) |
61
+ | `runner_app.py` | ADK Runner + session; `run_chat(message)` for Streamlit |
62
+ | `agent.py` | ADK agent (LiteLLM/OpenAI) + RAG tool |
63
+ | `prompt.py` | Agent name, description, instruction |
64
+ | `rag_tools.py` | ADK tool: `query_uploaded_documents` |
65
+ | `callback.py` | ADK callbacks |
66
+ | `rag.py` | ChromaDB + OpenAI embeddings |
67
+ | `pdf_processor.py` | PDF text extraction and chunking |
68
+ | `handbook_generator.py` | 20k-word handbook generation |
69
+ | `config.py` | Settings and paths |
70
+
71
+ ## Tech stack
72
+
73
+ - **Agent:** Google ADK, LiteLLM (OpenAI)
74
+ - **RAG:** OpenAI embeddings, ChromaDB (local)
75
+ - **UI:** Streamlit (agent runs in-process, no FastAPI)
src/agent.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ADK agent for the Handbook Generator.
3
+ Uses Google ADK + LiteLLM (OpenAI) + RAG tools.
4
+ """
5
+ import os
6
+ from dotenv import load_dotenv
7
+ from google.adk.agents.llm_agent import Agent
8
+ from google.adk.models.lite_llm import LiteLlm
9
+
10
+ import prompt as prmpt
11
+ import callback as cb
12
+ import rag_tools as tls
13
+
14
+ # πŸ”‘ Load .env file
15
+ load_dotenv()
16
+
17
+ openai_api_key = os.getenv("OPENAI_API_KEY")
18
+ model = os.getenv("MODEL", "openai/gpt-4o")
19
+
20
+ if not openai_api_key:
21
+ raise ValueError("OPENAI_API_KEY is not set in .env")
22
+
23
+ os.environ["OPENAI_API_KEY"] = openai_api_key
24
+
25
+ # Create the agent (same pattern as Policy)
26
+ root_agent = Agent(
27
+ model=LiteLlm(
28
+ model=model,
29
+ ),
30
+ name=prmpt.AGENT_CONFIG["name"],
31
+ description=prmpt.AGENT_CONFIG["description"],
32
+ instruction=prmpt.AGENT_CONFIG["instruction"],
33
+ tools=[tls.query_uploaded_documents],
34
+ before_agent_callback=cb.before_agent_callback,
35
+ after_agent_callback=cb.after_agent_callback,
36
+ before_model_callback=cb.before_model_callback,
37
+ after_model_callback=cb.after_model_callback,
38
+ before_tool_callback=cb.before_tool_callback,
39
+ after_tool_callback=cb.after_tool_callback,
40
+ on_model_error_callback=cb.on_model_error_callback,
41
+ on_tool_error_callback=cb.on_tool_error_callback,
42
+ )
src/app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Handbook Generator β€” Gradio UI (legacy fallback).
3
+ Primary UI is streamlit_app.py.
4
+ Run: python app.py
5
+ """
6
+ import asyncio
7
+ import shutil
8
+ from pathlib import Path
9
+
10
+ import gradio as gr
11
+
12
+ from config import GROK_API_KEY, UPLOADS_DIR, BASE_DIR
13
+ from handbook_generator import build_handbook
14
+ from rag import get_context_for_query, index_pdf, reset_index
15
+
16
+ HANDBOOK_EXPORT_PATH = BASE_DIR / "handbook_export.md"
17
+
18
+
19
+ def _run_async(coro):
20
+ """Run an async coroutine from sync Gradio code."""
21
+ try:
22
+ return asyncio.run(coro)
23
+ except RuntimeError:
24
+ import concurrent.futures
25
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
26
+ return pool.submit(asyncio.run, coro).result(timeout=300)
27
+
28
+
29
+ def ensure_api_key():
30
+ if not GROK_API_KEY:
31
+ raise gr.Error(
32
+ "GROK_API_KEY is not set. Create a .env file in the ass2 folder with: GROK_API_KEY=your-key"
33
+ )
34
+
35
+
36
+ def _file_path(f):
37
+ """Get path from Gradio file input (path string or object with .name)."""
38
+ if f is None:
39
+ return None
40
+ if isinstance(f, (str, Path)):
41
+ return Path(f)
42
+ return Path(getattr(f, "name", str(f)))
43
+
44
+
45
+ def upload_and_index(files):
46
+ """Handle PDF upload(s) and index into LightRAG."""
47
+ if not files:
48
+ return "No files selected."
49
+ ensure_api_key()
50
+ reset_index()
51
+ saved = []
52
+ for f in (files if isinstance(files, list) else [files]):
53
+ path = _file_path(f)
54
+ if path is None or not path.exists():
55
+ continue
56
+ dest = UPLOADS_DIR / path.name
57
+ try:
58
+ shutil.copy(str(path), str(dest))
59
+ except Exception:
60
+ dest = path
61
+ try:
62
+ n = _run_async(index_pdf(dest, source_name=path.name))
63
+ saved.append(f"{path.name}: indexed")
64
+ except Exception as e:
65
+ saved.append(f"{path.name}: Error - {e}")
66
+ return "\n".join(saved) if saved else "No PDFs processed."
67
+
68
+
69
+ def chat(message, history):
70
+ """RAG chat: retrieve context and answer using Grok via LiteLLM."""
71
+ ensure_api_key()
72
+ from litellm import completion
73
+ from config import CHAT_MODEL
74
+
75
+ context = _run_async(get_context_for_query(message))
76
+ if not context or not context.strip():
77
+ context = "No documents have been uploaded yet. Ask the user to upload PDFs first."
78
+
79
+ system = (
80
+ "You are a helpful assistant. Answer based ONLY on the following context "
81
+ "from the user's uploaded documents. If the answer is not in the context, say so clearly."
82
+ )
83
+ user_content = f"Context from uploaded documents:\n\n{context}\n\n---\n\nUser question: {message}"
84
+
85
+ resp = completion(
86
+ model=CHAT_MODEL,
87
+ messages=[
88
+ {"role": "system", "content": system},
89
+ {"role": "user", "content": user_content},
90
+ ],
91
+ api_key=GROK_API_KEY,
92
+ max_tokens=1500,
93
+ temperature=0.3,
94
+ )
95
+ return (resp.choices[0].message.content or "").strip()
96
+
97
+
98
+ def run_handbook_simple(topic):
99
+ """Generate handbook and return (status, markdown)."""
100
+ ensure_api_key()
101
+ if not (topic and topic.strip()):
102
+ return "Enter a topic first.", ""
103
+ status_msgs = []
104
+ try:
105
+ full_md = _run_async(build_handbook(topic.strip(), on_progress=status_msgs.append))
106
+ status = "\n".join(status_msgs) if status_msgs else "Done."
107
+ return status, full_md
108
+ except Exception as e:
109
+ return f"Error: {e}", ""
110
+
111
+
112
+ with gr.Blocks(title="Handbook Generator", theme=gr.themes.Soft()) as demo:
113
+ gr.Markdown("# Handbook Generator\nUpload PDFs, chat about them, and generate a 20,000+ word handbook.")
114
+
115
+ with gr.Tab("Upload PDFs"):
116
+ file_input = gr.File(
117
+ file_count="multiple",
118
+ file_types=[".pdf"],
119
+ label="Upload one or more PDFs",
120
+ )
121
+ index_btn = gr.Button("Index PDFs")
122
+ index_out = gr.Textbox(label="Index result", lines=4)
123
+
124
+ with gr.Tab("Chat"):
125
+ chatbot = gr.ChatInterface(
126
+ fn=chat,
127
+ type="messages",
128
+ title="Ask questions about your uploaded documents",
129
+ )
130
+
131
+ with gr.Tab("Generate Handbook"):
132
+ gr.Markdown(
133
+ "Enter a topic (e.g. *Create a handbook on Retrieval-Augmented Generation*). "
134
+ "Generation may take several minutes."
135
+ )
136
+ topic_in = gr.Textbox(
137
+ label="Handbook topic",
138
+ placeholder="e.g. Retrieval-Augmented Generation",
139
+ lines=1,
140
+ )
141
+ gen_btn = gr.Button("Generate 20k-word handbook")
142
+ status_out = gr.Textbox(label="Status", lines=4, interactive=False)
143
+ handbook_out = gr.Markdown(label="Handbook (Markdown)")
144
+ export_btn = gr.DownloadButton("Export as Markdown", visible=False)
145
+
146
+ index_btn.click(
147
+ fn=lambda files: upload_and_index(files) if files else "No files selected.",
148
+ inputs=[file_input],
149
+ outputs=[index_out],
150
+ )
151
+
152
+ def do_handbook(topic):
153
+ status, md = run_handbook_simple(topic)
154
+ if md:
155
+ HANDBOOK_EXPORT_PATH.write_text(md, encoding="utf-8")
156
+ return (
157
+ status,
158
+ md,
159
+ gr.update(visible=bool(md), value=str(HANDBOOK_EXPORT_PATH) if md else None),
160
+ )
161
+
162
+ gen_btn.click(
163
+ fn=do_handbook,
164
+ inputs=[topic_in],
165
+ outputs=[status_out, handbook_out, export_btn],
166
+ )
167
+
168
+ if __name__ == "__main__":
169
+ demo.launch(server_name="127.0.0.1", server_port=7860)
src/callback.py ADDED
@@ -0,0 +1,241 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ADK callbacks for logging and optional audit (same as policy/callback.py).
3
+ """
4
+
5
+ import logging
6
+ from typing import Any, Optional
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ _audit_sink: Optional[Any] = None
11
+
12
+
13
+ def set_audit_sink(sink: Any) -> None:
14
+ global _audit_sink
15
+ _audit_sink = sink
16
+
17
+
18
+ def _get_session_id(context: Any) -> Optional[str]:
19
+ try:
20
+ if hasattr(context, "session") and context.session is not None:
21
+ return getattr(context.session, "id", None) or getattr(
22
+ context.session, "session_id", None
23
+ )
24
+ except Exception:
25
+ pass
26
+ return None
27
+
28
+
29
+ def _get_message_preview(content: Any, max_len: int = 500) -> Optional[str]:
30
+ if content is None:
31
+ return None
32
+ try:
33
+ if hasattr(content, "parts") and content.parts:
34
+ text = getattr(content.parts[0], "text", None) or str(content.parts[0])[:max_len]
35
+ return (text or "")[:max_len] if text else None
36
+ if isinstance(content, str):
37
+ return content[:max_len]
38
+ return str(content)[:max_len]
39
+ except Exception:
40
+ return None
41
+
42
+
43
+ def _emit(event: dict) -> None:
44
+ logger.debug("[ADK callback] %s", event.get("event_type"), extra=event)
45
+ if _audit_sink is not None and hasattr(_audit_sink, "store"):
46
+ try:
47
+ _audit_sink.store(event)
48
+ except Exception as e:
49
+ logger.warning("[ADK callback] audit sink store failed: %s", e)
50
+
51
+
52
+ def _context_from_args(*args: Any, **kwargs: Any) -> Any:
53
+ return kwargs.get("callback_context") or (args[0] if args else None)
54
+
55
+
56
+ def before_agent_callback(*args: Any, **kwargs: Any) -> Optional[Any]:
57
+ context = _context_from_args(*args, **kwargs)
58
+ if context is None:
59
+ return None
60
+ try:
61
+ event = {
62
+ "event_type": "before_agent",
63
+ "agent_name": getattr(context, "agent_name", None),
64
+ "invocation_id": getattr(context, "invocation_id", None),
65
+ "user_id": getattr(context, "user_id", None),
66
+ "session_id": _get_session_id(context),
67
+ "tool_name": None,
68
+ "message_preview": _get_message_preview(getattr(context, "user_content", None)),
69
+ "has_error": False,
70
+ "details": {},
71
+ }
72
+ _emit(event)
73
+ except Exception as e:
74
+ logger.warning("[ADK callback] before_agent failed: %s", e)
75
+ return None
76
+
77
+
78
+ def after_agent_callback(*args: Any, **kwargs: Any) -> Optional[Any]:
79
+ context = _context_from_args(*args, **kwargs)
80
+ if context is None:
81
+ return None
82
+ try:
83
+ event = {
84
+ "event_type": "after_agent",
85
+ "agent_name": getattr(context, "agent_name", None),
86
+ "invocation_id": getattr(context, "invocation_id", None),
87
+ "user_id": getattr(context, "user_id", None),
88
+ "session_id": _get_session_id(context),
89
+ "tool_name": None,
90
+ "message_preview": _get_message_preview(getattr(context, "user_content", None)),
91
+ "has_error": False,
92
+ "details": {},
93
+ }
94
+ _emit(event)
95
+ except Exception as e:
96
+ logger.warning("[ADK callback] after_agent failed: %s", e)
97
+ return None
98
+
99
+
100
+ def before_model_callback(*args: Any, **kwargs: Any) -> Optional[Any]:
101
+ context = _context_from_args(*args, **kwargs)
102
+ llm_request = kwargs.get("llm_request")
103
+ if context is None:
104
+ return None
105
+ try:
106
+ message_preview = None
107
+ if llm_request is not None and hasattr(llm_request, "contents") and llm_request.contents:
108
+ last = llm_request.contents[-1]
109
+ message_preview = _get_message_preview(last)
110
+ event = {
111
+ "event_type": "before_model",
112
+ "agent_name": getattr(context, "agent_name", None),
113
+ "invocation_id": getattr(context, "invocation_id", None),
114
+ "user_id": getattr(context, "user_id", None),
115
+ "session_id": _get_session_id(context),
116
+ "tool_name": None,
117
+ "message_preview": message_preview,
118
+ "has_error": False,
119
+ "details": {},
120
+ }
121
+ _emit(event)
122
+ except Exception as e:
123
+ logger.warning("[ADK callback] before_model failed: %s", e)
124
+ return None
125
+
126
+
127
+ def after_model_callback(*args: Any, **kwargs: Any) -> Optional[Any]:
128
+ context = _context_from_args(*args, **kwargs)
129
+ if context is None:
130
+ return None
131
+ try:
132
+ event = {
133
+ "event_type": "after_model",
134
+ "agent_name": getattr(context, "agent_name", None),
135
+ "invocation_id": getattr(context, "invocation_id", None),
136
+ "user_id": getattr(context, "user_id", None),
137
+ "session_id": _get_session_id(context),
138
+ "tool_name": None,
139
+ "message_preview": _get_message_preview(getattr(context, "user_content", None)),
140
+ "has_error": False,
141
+ "details": {},
142
+ }
143
+ _emit(event)
144
+ except Exception as e:
145
+ logger.warning("[ADK callback] after_model failed: %s", e)
146
+ return None
147
+
148
+
149
+ def before_tool_callback(*args: Any, **kwargs: Any) -> Optional[Any]:
150
+ context = _context_from_args(*args, **kwargs)
151
+ tool_name = kwargs.get("tool_name")
152
+ tool_args = kwargs.get("tool_input") or kwargs.get("tool_args")
153
+ if context is None:
154
+ return None
155
+ try:
156
+ event = {
157
+ "event_type": "before_tool",
158
+ "agent_name": getattr(context, "agent_name", None),
159
+ "invocation_id": getattr(context, "invocation_id", None),
160
+ "user_id": getattr(context, "user_id", None),
161
+ "session_id": _get_session_id(context),
162
+ "tool_name": tool_name,
163
+ "message_preview": str(tool_args)[:500] if tool_args is not None else None,
164
+ "has_error": False,
165
+ "details": {"tool_args": tool_args} if tool_args is not None else {},
166
+ }
167
+ _emit(event)
168
+ except Exception as e:
169
+ logger.warning("[ADK callback] before_tool failed: %s", e)
170
+ return None
171
+
172
+
173
+ def after_tool_callback(*args: Any, **kwargs: Any) -> Optional[Any]:
174
+ context = _context_from_args(*args, **kwargs)
175
+ tool_name = kwargs.get("tool_name")
176
+ tool_result = kwargs.get("tool_result") or kwargs.get("result")
177
+ if context is None:
178
+ return None
179
+ try:
180
+ event = {
181
+ "event_type": "after_tool",
182
+ "agent_name": getattr(context, "agent_name", None),
183
+ "invocation_id": getattr(context, "invocation_id", None),
184
+ "user_id": getattr(context, "user_id", None),
185
+ "session_id": _get_session_id(context),
186
+ "tool_name": tool_name,
187
+ "message_preview": str(tool_result)[:500] if tool_result is not None else None,
188
+ "has_error": False,
189
+ "details": {"tool_result": tool_result} if tool_result is not None else {},
190
+ }
191
+ _emit(event)
192
+ except Exception as e:
193
+ logger.warning("[ADK callback] after_tool failed: %s", e)
194
+ return None
195
+
196
+
197
+ def on_model_error_callback(*args: Any, **kwargs: Any) -> Optional[Any]:
198
+ context = _context_from_args(*args, **kwargs)
199
+ error = kwargs.get("error")
200
+ if context is None:
201
+ return None
202
+ try:
203
+ event = {
204
+ "event_type": "on_model_error",
205
+ "agent_name": getattr(context, "agent_name", None),
206
+ "invocation_id": getattr(context, "invocation_id", None),
207
+ "user_id": getattr(context, "user_id", None),
208
+ "session_id": _get_session_id(context),
209
+ "tool_name": None,
210
+ "message_preview": str(error)[:500] if error is not None else None,
211
+ "has_error": True,
212
+ "details": {"error": str(error)},
213
+ }
214
+ _emit(event)
215
+ except Exception as e:
216
+ logger.warning("[ADK callback] on_model_error failed: %s", e)
217
+ return None
218
+
219
+
220
+ def on_tool_error_callback(*args: Any, **kwargs: Any) -> Optional[Any]:
221
+ context = _context_from_args(*args, **kwargs)
222
+ tool_name = kwargs.get("tool_name")
223
+ error = kwargs.get("error")
224
+ if context is None:
225
+ return None
226
+ try:
227
+ event = {
228
+ "event_type": "on_tool_error",
229
+ "agent_name": getattr(context, "agent_name", None),
230
+ "invocation_id": getattr(context, "invocation_id", None),
231
+ "user_id": getattr(context, "user_id", None),
232
+ "session_id": _get_session_id(context),
233
+ "tool_name": tool_name,
234
+ "message_preview": str(error)[:500] if error is not None else None,
235
+ "has_error": True,
236
+ "details": {"error": str(error)},
237
+ }
238
+ _emit(event)
239
+ except Exception as e:
240
+ logger.warning("[ADK callback] on_tool_error failed: %s", e)
241
+ return None
src/config.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Configuration for the Handbook Generator app."""
2
+ import os
3
+ from pathlib import Path
4
+
5
+ # Load from .env if present
6
+ from dotenv import load_dotenv
7
+ load_dotenv()
8
+
9
+ # ── API Keys ──────────────────────────────────────────────────────────
10
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
11
+ CHAT_MODEL = os.getenv("MODEL", "openai/gpt-4o").strip() or "openai/gpt-4o"
12
+ EMBEDDING_MODEL = "text-embedding-3-small"
13
+
14
+ # Supabase
15
+ SUPABASE_URL = os.getenv("SUPABASE_URL", "")
16
+ SUPABASE_KEY = os.getenv("SUPABASE_KEY", "")
17
+
18
+ # ── Paths ─────────────────────────────────────────────────────────────
19
+ BASE_DIR = Path(__file__).resolve().parent
20
+ UPLOADS_DIR = BASE_DIR / "uploads"
21
+ UPLOADS_DIR.mkdir(exist_ok=True)
22
+
23
+ # LightRAG working directory
24
+ WORKING_DIR = BASE_DIR / "lightrag_working"
25
+ WORKING_DIR.mkdir(exist_ok=True)
26
+
27
+ # ── Chunking (used by pdf_processor) ─────────────────────────────────
28
+ CHUNK_SIZE = 1000
29
+ CHUNK_OVERLAP = 200
30
+
31
+ # ── Handbook generation ───────────────────────────────────────────────
32
+ TARGET_WORD_COUNT = 20000
33
+ SECTION_WORD_TARGET = 1200
34
+ MAX_SECTIONS = 25
src/extract_pdf.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Extract assignment PDF text to a file for reading."""
2
+ from pypdf import PdfReader
3
+
4
+ reader = PdfReader("AI-Engineering-Assignment.pdf")
5
+ with open("assignment_text.txt", "w", encoding="utf-8") as f:
6
+ for i, page in enumerate(reader.pages):
7
+ text = page.extract_text()
8
+ if text:
9
+ f.write(f"--- Page {i+1} ---\n")
10
+ f.write(text)
11
+ f.write("\n\n")
12
+ print("Done. Written to assignment_text.txt")
src/handbook_export.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # jungle book
2
+
3
+ ## Introduction
4
+
5
+ "The Jungle Book," a timeless classic penned by Rudyard Kipling, is an exemplary piece of literature that continues to captivate readers of all ages. Originally published in 1894, this collection of stories has been celebrated for its vibrant depiction of the natural world and its profound exploration of human nature through the lens of animal allegories. The tales unfold in the lush, mysterious jungles of India, where anthropomorphized animals and human characters coexist, revealing insights into society, morality, and the intrinsic link between man and nature.
6
+
7
+ ### Overview
8
+
9
+ At its core, "The Jungle Book" is a compilation of stories and poems, with the most renowned being the tales of Mowgli, a young boy raised by wolves in the Indian jungle. These narratives are not merely stories of adventure and wildlife; they are rich tapestries woven with themes of identity, belonging, and the struggle between civilization and the wild. Kipling's masterful storytelling is complemented by his intricate descriptions of the jungle environment, which becomes a character in its own right, embodying both beauty and peril.
10
+
11
+ The structure of "The Jungle Book" is unique, as it oscillates between the adventures of Mowgli and other standalone tales featuring a diverse array of characters, such as Rikki-Tikki-Tavi, the valiant mongoose, and Kotick, the white seal. This collection is a mosaic of narratives that, while distinct, collectively paint a vivid picture of Kipling's imagined world. Each story serves as a parable, imparting lessons about courage, loyalty, and the consequences of human actions on the natural world.
12
+
13
+ ### Importance
14
+
15
+ "The Jungle Book" holds a significant place in both literary and cultural history. Its importance is multifaceted, spanning from its contributions to children's literature to its impact on popular culture. The book is celebrated for its ability to transcend the boundaries of age and time, offering insights that resonate with both young readers and adults.
16
+
17
+ One of the key reasons for its enduring importance is Kipling's ability to convey complex themes through the guise of seemingly simple stories. The character of Mowgli, for instance, is a powerful allegory for the journey of self-discovery and the quest for identity. Through Mowgli's interactions with the jungle's inhabitants, Kipling explores the idea of what it means to be human and the innate tension between nature and nurture.
18
+
19
+ Furthermore, "The Jungle Book" is an important work in the context of colonial literature. Written during a time when the British Empire was at its zenith, the book reflects the complexities of colonial attitudes towards India. Kipling, an Anglo-Indian, infuses his stories with a nuanced understanding of Indian culture, even as he grapples with the ideological biases of his time. This duality provides a rich ground for analysis and discussion, making "The Jungle Book" a valuable resource for examining the interplay between literature and historical context.
20
+
21
+ ### Scope
22
+
23
+ The scope of "The Jungle Book" extends beyond its pages, influencing a wide range of adaptations and interpretations across different media. The stories have been adapted into numerous films, television series, stage productions, and even operas, each bringing a new perspective to Kipling's work. These adaptations have contributed to the book's lasting legacy, ensuring its relevance in contemporary culture.
24
+
25
+ In literature, "The Jungle Book" has inspired countless authors and storytellers, influencing the way animals and nature are depicted in fiction. Kipling's anthropomorphic portrayal of animals set a precedent for future works, encouraging writers to explore the moral and philosophical implications of the animal kingdom as a reflection of human society.
26
+
27
+ Academically, "The Jungle Book" is a rich subject for study, offering insights into narrative structure, character development, and thematic exploration. It serves as a foundational text in the study of children's literature, colonial literature, and ecological narratives. Scholars analyze its themes of belonging and identity, the ethical dilemmas posed by its characters, and its portrayal of the natural world, which continues to resonate in today's discussions about environmental conservation and human impact on nature.
28
+
29
+ In conclusion, "The Jungle Book" is more than just a collection of stories; it is a cultural artifact that has shaped the literary landscape and continues to inspire and provoke thought. Its blend of adventure, moral lessons, and richly drawn characters ensures its place as a beloved classic, while its themes of identity, nature, and society provide fertile ground for continued exploration and appreciation.
30
+
src/handbook_generator.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Generate 20,000+ word handbooks using the LongWriter technique and OpenAI."""
2
+ from typing import Callable, List, Optional
3
+
4
+ from openai import OpenAI
5
+ from config import (
6
+ OPENAI_API_KEY,
7
+ CHAT_MODEL,
8
+ SECTION_WORD_TARGET,
9
+ TARGET_WORD_COUNT,
10
+ )
11
+ from rag import get_context_for_query
12
+
13
+ _client: Optional[OpenAI] = None
14
+
15
+
16
+ def _get_client() -> OpenAI:
17
+ global _client
18
+ if _client is None:
19
+ _client = OpenAI(api_key=OPENAI_API_KEY)
20
+ return _client
21
+
22
+
23
+ def word_count(text: str) -> int:
24
+ return len(text.split())
25
+
26
+
27
+ def llm_call(prompt: str, system_prompt: str = "You are a professional writer.") -> str:
28
+ """Synchronous OpenAI call."""
29
+ # Extract model name without provider prefix
30
+ model = CHAT_MODEL.split("/")[-1] if "/" in CHAT_MODEL else CHAT_MODEL
31
+ client = _get_client()
32
+ resp = client.chat.completions.create(
33
+ model=model,
34
+ messages=[
35
+ {"role": "system", "content": system_prompt},
36
+ {"role": "user", "content": prompt},
37
+ ],
38
+ temperature=0.7,
39
+ )
40
+ return resp.choices[0].message.content or ""
41
+
42
+
43
+ # ─── LongWriter Phase 1: Outline ─────────────────────────────────────
44
+ def generate_outline(topic: str, context: str) -> List[dict]:
45
+ """Generate a detailed structure for the handbook."""
46
+ prompt = f"""You are planning a 20,000-word handbook on "{topic}".
47
+ Based on the reference material, create a highly detailed outline.
48
+ For each major section, provide a title and 3-5 sub-points (keywords) to cover.
49
+ Aim for 15-20 sections to ensure we can hit the 20k word target.
50
+
51
+ Reference: {context[:10000] if context else 'No reference material available.'}
52
+
53
+ Output as a list of sections (one per line):
54
+ Section Title | point 1, point 2, point 3
55
+ ..."""
56
+
57
+ content = llm_call(prompt)
58
+ sections = []
59
+ for line in content.splitlines():
60
+ if "|" in line:
61
+ title, points = line.split("|", 1)
62
+ sections.append({"title": title.strip(), "points": points.strip()})
63
+ return sections
64
+
65
+
66
+ # ─── LongWriter Phase 2: Generate long sections ──────────────────────
67
+ def generate_long_section(
68
+ section_title: str,
69
+ points: str,
70
+ topic: str,
71
+ context: str,
72
+ previous_context: str,
73
+ ) -> str:
74
+ """Generate a single section following LongWriter length instructions."""
75
+ prev_snippet = previous_context[-2000:] if previous_context else "This is the first section."
76
+ ctx_snippet = context[:5000] if context else "No reference material."
77
+
78
+ prompt = f"""Write a comprehensive section for a handbook on "{topic}".
79
+ Section Title: {section_title}
80
+ Key points to cover: {points}
81
+
82
+ Reference Material: {ctx_snippet}
83
+
84
+ LongWriter Instructions:
85
+ 1. This section MUST be at least 1,500 words long.
86
+ 2. Be extremely descriptive. Explain concepts in depth.
87
+ 3. Use examples, analogies, and detailed breakdowns.
88
+ 4. Maintain continuity with previous sections: {prev_snippet}
89
+
90
+ Write only the content for "{section_title}". Start with ## {section_title}."""
91
+
92
+ return llm_call(prompt)
93
+
94
+
95
+ # ─── Main builder ────────────────────────────────────────────────────
96
+ def build_handbook(
97
+ topic: str,
98
+ on_progress: Optional[Callable[[str], None]] = None,
99
+ ) -> str:
100
+ """Build a 20k+ word handbook using the LongWriter technique."""
101
+
102
+ def progress(msg: str):
103
+ if on_progress:
104
+ on_progress(msg)
105
+
106
+ progress("Gathering context for outline...")
107
+ context = get_context_for_query(topic)
108
+
109
+ progress("Generating detailed outline (LongWriter phase 1)...")
110
+ sections = generate_outline(topic, context)
111
+ if not sections:
112
+ sections = [{"title": "Introduction", "points": "Overview, Importance, Scope"}]
113
+
114
+ progress(f"Outline created with {len(sections)} sections. Starting generation...")
115
+
116
+ full_handbook = [f"# {topic}\n\n"]
117
+ total_words = 0
118
+ previous_content = ""
119
+
120
+ for i, sec in enumerate(sections):
121
+ title = sec["title"]
122
+ points = sec["points"]
123
+ progress(f"Writing Section {i+1}/{len(sections)}: {title} (Target: 1500+ words)...")
124
+
125
+ sec_context = get_context_for_query(f"{topic}: {title} {points}")
126
+
127
+ section_text = generate_long_section(
128
+ title, points, topic, sec_context, previous_content
129
+ )
130
+
131
+ full_handbook.append(section_text + "\n\n")
132
+ previous_content += section_text
133
+ total_words += word_count(section_text)
134
+
135
+ progress(f"Section completed. Current word count: {total_words}")
136
+
137
+ if total_words >= TARGET_WORD_COUNT:
138
+ progress("Target length reached.")
139
+ break
140
+
141
+ final_doc = "".join(full_handbook)
142
+ progress(f"Handbook generated. Final word count: {word_count(final_doc)}")
143
+ return final_doc
src/pdf_processor.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """PDF text extraction and chunking for RAG."""
2
+ import re
3
+ from pathlib import Path
4
+ from typing import List
5
+
6
+ import pdfplumber
7
+ from pypdf import PdfReader
8
+
9
+ from config import CHUNK_OVERLAP, CHUNK_SIZE
10
+
11
+
12
+ def extract_text_from_pdf(pdf_path: str | Path) -> str:
13
+ """Extract text from a PDF using pdfplumber (better for tables) with pypdf fallback."""
14
+ path = Path(pdf_path)
15
+ if not path.exists():
16
+ raise FileNotFoundError(f"PDF not found: {path}")
17
+
18
+ text_parts: List[str] = []
19
+ try:
20
+ with pdfplumber.open(path) as pdf:
21
+ for page in pdf.pages:
22
+ t = page.extract_text()
23
+ if t:
24
+ text_parts.append(t)
25
+ except Exception:
26
+ # Fallback to pypdf
27
+ reader = PdfReader(path)
28
+ for page in reader.pages:
29
+ t = page.extract_text()
30
+ if t:
31
+ text_parts.append(t)
32
+
33
+ raw = "\n\n".join(text_parts)
34
+ # Normalize whitespace
35
+ return re.sub(r"\s+", " ", raw).strip()
36
+
37
+
38
+ def chunk_text(
39
+ text: str,
40
+ chunk_size: int = CHUNK_SIZE,
41
+ overlap: int = CHUNK_OVERLAP,
42
+ ) -> List[dict]:
43
+ """
44
+ Split text into overlapping chunks for embedding.
45
+ Returns list of dicts with 'text' and 'metadata' (source, chunk_index).
46
+ """
47
+ if not text or not text.strip():
48
+ return []
49
+
50
+ chunks: List[dict] = []
51
+ start = 0
52
+ index = 0
53
+ text = text.strip()
54
+
55
+ while start < len(text):
56
+ end = start + chunk_size
57
+ chunk = text[start:end]
58
+
59
+ # Try to break at sentence or word boundary
60
+ if end < len(text):
61
+ last_period = chunk.rfind(". ")
62
+ last_newline = chunk.rfind("\n")
63
+ break_at = max(last_period, last_newline)
64
+ if break_at > chunk_size // 2:
65
+ chunk = chunk[: break_at + 1]
66
+ end = start + break_at + 1
67
+
68
+ chunk = chunk.strip()
69
+ if chunk:
70
+ chunks.append({
71
+ "text": chunk,
72
+ "metadata": {"chunk_index": index},
73
+ })
74
+ index += 1
75
+
76
+ start = end - overlap if end < len(text) else len(text)
77
+
78
+ return chunks
79
+
80
+
81
+ def process_pdf(pdf_path: str | Path, source_name: str | None = None) -> List[dict]:
82
+ """
83
+ Extract text from PDF and return chunks with source metadata.
84
+ source_name: optional label (e.g. filename) for metadata.
85
+ """
86
+ path = Path(pdf_path)
87
+ source_name = source_name or path.name
88
+ text = extract_text_from_pdf(path)
89
+ chunks = chunk_text(text)
90
+ for c in chunks:
91
+ c["metadata"]["source"] = source_name
92
+ return chunks
src/prompt.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Agent prompt configuration for the Handbook Generator."""
2
+
3
+ AGENT_CONFIG = {
4
+ "name": "handbook_assistant",
5
+ "description": "An AI assistant that answers questions from uploaded PDFs and helps generate handbooks. Uses RAG (retrieval from uploaded documents) via tools.",
6
+ "instruction": """
7
+ You are the Handbook Assistant, an intelligent assistant that helps users understand their uploaded PDF documents and generate long-form handbooks.
8
+
9
+ CRITICAL RULES:
10
+ 1. ALWAYS call the query_uploaded_documents tool FIRST before answering ANY question about documents, assignments, PDFs, or their content. NEVER assume documents are not uploaded β€” always check by calling the tool.
11
+ 2. When the user says they uploaded a PDF or asks about "my document", "my assignment", "my PDF", etc., you MUST call query_uploaded_documents with their question.
12
+ 3. Only say "no documents uploaded" if the tool explicitly returns that message.
13
+ 4. After getting tool results, summarize the retrieved content in your answer. Do not dump raw text.
14
+
15
+ How to behave:
16
+ - Use the query_uploaded_documents tool for ANY question that could relate to uploaded content.
17
+ - Be professional and helpful.
18
+ - For handbook generation, tell the user to use the "Generate Handbook" tab.
19
+ - Maintain context across messages.
20
+ """
21
+ }
src/rag.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RAG System β€” Hybrid approach:
3
+ 1. Simple vector search (OpenAI embeddings + local storage) for reliable chat retrieval
4
+ 2. LightRAG knowledge graph for enriched context (optional, non-blocking)
5
+
6
+ This avoids LightRAG's internal async worker issues with Streamlit.
7
+ """
8
+ import json
9
+ import os
10
+ from pathlib import Path
11
+ from typing import List, Optional
12
+
13
+ import numpy as np
14
+ from openai import OpenAI
15
+
16
+ from config import (
17
+ OPENAI_API_KEY,
18
+ CHAT_MODEL,
19
+ WORKING_DIR,
20
+ EMBEDDING_MODEL,
21
+ CHUNK_SIZE,
22
+ CHUNK_OVERLAP,
23
+ )
24
+ from pdf_processor import extract_text_from_pdf, chunk_text
25
+
26
+ # ─── Vector store file ───────────────────────────────────────────────
27
+ VECTORS_FILE = WORKING_DIR / "vectors.json"
28
+
29
+ _client: Optional[OpenAI] = None
30
+ _chunks_db: list[dict] = [] # {"text": ..., "embedding": [...]}
31
+
32
+
33
+ def _get_client() -> OpenAI:
34
+ global _client
35
+ if _client is None:
36
+ _client = OpenAI(api_key=OPENAI_API_KEY)
37
+ return _client
38
+
39
+
40
+ def _embed_texts(texts: list[str]) -> list[list[float]]:
41
+ """Get embeddings from OpenAI (sync, reliable)."""
42
+ client = _get_client()
43
+ response = client.embeddings.create(
44
+ model=EMBEDDING_MODEL,
45
+ input=texts,
46
+ )
47
+ return [item.embedding for item in response.data]
48
+
49
+
50
+ def _cosine_similarity(a: list[float], b: list[float]) -> float:
51
+ a_np = np.array(a)
52
+ b_np = np.array(b)
53
+ dot = np.dot(a_np, b_np)
54
+ norm = np.linalg.norm(a_np) * np.linalg.norm(b_np)
55
+ return float(dot / norm) if norm > 0 else 0.0
56
+
57
+
58
+ def _load_db():
59
+ """Load vector DB from disk."""
60
+ global _chunks_db
61
+ if VECTORS_FILE.exists():
62
+ with open(VECTORS_FILE, "r", encoding="utf-8") as f:
63
+ _chunks_db = json.load(f)
64
+ else:
65
+ _chunks_db = []
66
+
67
+
68
+ def _save_db():
69
+ """Save vector DB to disk."""
70
+ WORKING_DIR.mkdir(parents=True, exist_ok=True)
71
+ with open(VECTORS_FILE, "w", encoding="utf-8") as f:
72
+ json.dump(_chunks_db, f)
73
+
74
+
75
+ # ─── Public API (all synchronous β€” no event loop issues) ─────────────
76
+
77
+ def index_pdf(pdf_path: str | Path, source_name: str | None = None) -> int:
78
+ """Extract text from PDF, chunk, embed, and store. Returns number of chunks."""
79
+ global _chunks_db
80
+
81
+ text = extract_text_from_pdf(pdf_path)
82
+ if not text:
83
+ return 0
84
+
85
+ source = source_name or Path(pdf_path).name
86
+ chunks = chunk_text(text)
87
+
88
+ if not chunks:
89
+ return 0
90
+
91
+ # Get embeddings for all chunks
92
+ texts = [c["text"] for c in chunks]
93
+
94
+ # Embed in batches of 20 to avoid token limits
95
+ all_embeddings = []
96
+ for i in range(0, len(texts), 20):
97
+ batch = texts[i:i+20]
98
+ batch_embeddings = _embed_texts(batch)
99
+ all_embeddings.extend(batch_embeddings)
100
+
101
+ # Store
102
+ for chunk, embedding in zip(chunks, all_embeddings):
103
+ _chunks_db.append({
104
+ "text": chunk["text"],
105
+ "source": source,
106
+ "embedding": embedding,
107
+ })
108
+
109
+ _save_db()
110
+ return len(chunks)
111
+
112
+
113
+ def index_pdfs(pdf_paths: List[str | Path]) -> int:
114
+ """Index multiple PDFs."""
115
+ total = 0
116
+ for p in pdf_paths:
117
+ total += index_pdf(p)
118
+ return total
119
+
120
+
121
+ def get_context_for_query(query: str, top_k: int = 5) -> str:
122
+ """Retrieve relevant chunks using cosine similarity."""
123
+ _load_db()
124
+
125
+ if not _chunks_db:
126
+ return ""
127
+
128
+ # Embed the query
129
+ query_embedding = _embed_texts([query])[0]
130
+
131
+ # Score all chunks
132
+ scored = []
133
+ for chunk in _chunks_db:
134
+ sim = _cosine_similarity(query_embedding, chunk["embedding"])
135
+ scored.append((sim, chunk["text"], chunk.get("source", "unknown")))
136
+
137
+ # Sort by similarity
138
+ scored.sort(key=lambda x: x[0], reverse=True)
139
+
140
+ # Take top_k
141
+ results = scored[:top_k]
142
+
143
+ if not results:
144
+ return ""
145
+
146
+ # Format context
147
+ context_parts = []
148
+ for i, (score, text, source) in enumerate(results, 1):
149
+ context_parts.append(f"[Source: {source} | Relevance: {score:.2f}]\n{text}")
150
+
151
+ return "\n\n---\n\n".join(context_parts)
152
+
153
+
154
+ def reset_index():
155
+ """Clear all indexed data."""
156
+ global _chunks_db
157
+ import shutil
158
+
159
+ _chunks_db = []
160
+ if WORKING_DIR.exists():
161
+ shutil.rmtree(WORKING_DIR)
162
+ WORKING_DIR.mkdir(parents=True, exist_ok=True)
src/rag_tools.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RAG tools for the ADK agent.
3
+ The agent calls these tools to get context from uploaded PDFs.
4
+ """
5
+ from rag import get_context_for_query
6
+
7
+ RAG_TOP_K = 8
8
+
9
+
10
+ def query_uploaded_documents(query: str) -> str:
11
+ """
12
+ Retrieve relevant passages from the user's uploaded PDF documents
13
+ for a given question or topic.
14
+ Call this whenever the user asks about the content of their documents.
15
+ """
16
+ if not query or not str(query).strip():
17
+ return "Please provide a non-empty question or topic to search the documents."
18
+ context = get_context_for_query(query.strip(), top_k=RAG_TOP_K)
19
+ if not context or not context.strip():
20
+ return (
21
+ "No documents have been indexed yet, or no relevant passages were found. "
22
+ "Ask the user to upload and index PDFs first, or try a different query."
23
+ )
24
+ return context
src/runner_app.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ADK Runner setup β€” used by Streamlit to run the agent directly (no API).
3
+ """
4
+ import asyncio
5
+ import logging
6
+ from google.adk.sessions import InMemorySessionService
7
+ from google.adk.runners import Runner
8
+ from google.genai import types
9
+
10
+ from agent import root_agent
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ APP_NAME = "handbook_app"
15
+ session_service = InMemorySessionService()
16
+
17
+ runner = Runner(
18
+ agent=root_agent,
19
+ app_name=APP_NAME,
20
+ session_service=session_service,
21
+ )
22
+
23
+
24
+ def _text_from_content(content) -> str | None:
25
+ if content is None:
26
+ return None
27
+ try:
28
+ parts = getattr(content, "parts", None)
29
+ if not parts:
30
+ return None
31
+ for part in parts:
32
+ text = getattr(part, "text", None)
33
+ if text is not None and str(text).strip():
34
+ return str(text).strip()
35
+ except (AttributeError, TypeError, IndexError):
36
+ pass
37
+ return None
38
+
39
+
40
+ async def _run_chat_async(user_id: str, session_id: str, message: str) -> str:
41
+ session = await session_service.get_session(
42
+ app_name=APP_NAME,
43
+ user_id=user_id,
44
+ session_id=session_id,
45
+ )
46
+ if not session:
47
+ await session_service.create_session(
48
+ app_name=APP_NAME,
49
+ user_id=user_id,
50
+ session_id=session_id,
51
+ )
52
+
53
+ user_content = types.Content(
54
+ role="user",
55
+ parts=[types.Part(text=message)],
56
+ )
57
+
58
+ final_response = None
59
+ async for event in runner.run_async(
60
+ user_id=user_id,
61
+ session_id=session_id,
62
+ new_message=user_content,
63
+ ):
64
+ try:
65
+ if getattr(event, "is_final_response", lambda: False)() and getattr(event, "content", None):
66
+ text = _text_from_content(event.content)
67
+ if text:
68
+ final_response = text
69
+ break
70
+ if getattr(event, "content", None):
71
+ text = _text_from_content(event.content)
72
+ if text:
73
+ final_response = text
74
+ except (AttributeError, TypeError, KeyError):
75
+ continue
76
+
77
+ return final_response or "No response from agent. Please try again."
78
+
79
+
80
+ def run_chat(message: str, user_id: str = "default_user") -> str:
81
+ """Run the ADK agent with the given message. Sync wrapper for Streamlit."""
82
+ session_id = f"{user_id}_session"
83
+ try:
84
+ try:
85
+ return asyncio.run(_run_chat_async(user_id, session_id, message))
86
+ except RuntimeError as re:
87
+ if "event loop" in str(re).lower() or "already running" in str(re).lower():
88
+ import concurrent.futures
89
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as ex:
90
+ future = ex.submit(
91
+ asyncio.run, _run_chat_async(user_id, session_id, message)
92
+ )
93
+ return future.result(timeout=120)
94
+ raise
95
+ except Exception as e:
96
+ logger.exception("Chat failed: %s", e)
97
+ return f"Error: {e}"
src/streamlit_app.py CHANGED
@@ -1,40 +1,172 @@
1
- import altair as alt
2
- import numpy as np
3
- import pandas as pd
 
 
 
 
 
4
  import streamlit as st
 
 
 
5
 
6
- """
7
- # Welcome to Streamlit!
 
 
8
 
9
- Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
10
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
11
- forums](https://discuss.streamlit.io).
12
 
13
- In the meantime, below is an example of what you can do with just a few lines of code:
14
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
17
- num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
18
-
19
- indices = np.linspace(0, 1, num_points)
20
- theta = 2 * np.pi * num_turns * indices
21
- radius = indices
22
-
23
- x = radius * np.cos(theta)
24
- y = radius * np.sin(theta)
25
-
26
- df = pd.DataFrame({
27
- "x": x,
28
- "y": y,
29
- "idx": indices,
30
- "rand": np.random.randn(num_points),
31
- })
32
-
33
- st.altair_chart(alt.Chart(df, height=700, width=700)
34
- .mark_point(filled=True)
35
- .encode(
36
- x=alt.X("x", axis=None),
37
- y=alt.Y("y", axis=None),
38
- color=alt.Color("idx", legend=None, scale=alt.Scale()),
39
- size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
40
- ))
 
1
+ """
2
+ Handbook Generator β€” Streamlit UI with ADK agent.
3
+ All RAG operations are synchronous β€” no event loop issues.
4
+ """
5
+ import asyncio
6
+ import time
7
+ from pathlib import Path
8
+
9
  import streamlit as st
10
+ from google.adk.sessions import InMemorySessionService
11
+ from google.adk.runners import Runner
12
+ from google.genai import types
13
 
14
+ from config import OPENAI_API_KEY, UPLOADS_DIR, BASE_DIR
15
+ from rag import index_pdf, reset_index
16
+ from handbook_generator import build_handbook
17
+ from agent import root_agent
18
 
19
+ HANDBOOK_EXPORT_PATH = BASE_DIR / "handbook_export.md"
 
 
20
 
21
+ # ────────────────────────────────────────────────
22
+ APP_NAME = "handbook_app"
23
+ session_service = InMemorySessionService()
24
+ runner = Runner(
25
+ agent=root_agent,
26
+ app_name=APP_NAME,
27
+ session_service=session_service,
28
+ )
29
+
30
+ # ────────────────────────────────────────────────
31
+ st.set_page_config(page_title="Handbook Generator", page_icon="πŸ“–", layout="wide")
32
+
33
+ st.markdown("""
34
+ <style>
35
+ .stChatMessage { margin-bottom: 1.1rem !important; border-radius: 16px !important; }
36
+ .stChatInput > div > div { border-radius: 24px !important; padding: 0.5rem 1rem; }
37
+ </style>
38
+ """, unsafe_allow_html=True)
39
+
40
+
41
+ def ensure_api_key():
42
+ if not OPENAI_API_KEY:
43
+ st.error("OPENAI_API_KEY is not set. Create a .env with OPENAI_API_KEY=sk-...")
44
+ return False
45
+ return True
46
+
47
+
48
+ if not ensure_api_key():
49
+ st.stop()
50
+
51
+ st.title("πŸ“– Handbook Generator")
52
+ st.caption("Upload PDFs β†’ Chat with ADK agent (RAG) β†’ Generate 20k-word handbook")
53
+
54
+ for key, value in {"messages": [], "user_id": "default_user"}.items():
55
+ if key not in st.session_state:
56
+ st.session_state[key] = value
57
+
58
+ tab1, tab2, tab3 = st.tabs(["Upload PDFs", "Chat", "Generate Handbook"])
59
+
60
+ # ── Tab 1: Upload (synchronous β€” no async needed) ────────────────
61
+ with tab1:
62
+ st.subheader("Upload and index PDFs")
63
+ files = st.file_uploader("Choose PDF files", type=["pdf"], accept_multiple_files=True)
64
+ if st.button("Index PDFs"):
65
+ if not files:
66
+ st.warning("Select at least one PDF.")
67
+ else:
68
+ reset_index()
69
+ results = []
70
+ for f in files:
71
+ dest = UPLOADS_DIR / f.name
72
+ dest.write_bytes(f.getvalue())
73
+ try:
74
+ n = index_pdf(dest, source_name=f.name)
75
+ results.append(f"βœ… {f.name}: {n} chunks indexed")
76
+ except Exception as e:
77
+ results.append(f"❌ {f.name}: Error β€” {e}")
78
+ st.success("\n".join(results))
79
+
80
+ # ── Tab 2: Chat ───────────────────────────────────────────────────
81
+ with tab2:
82
+ st.subheader("Chat (ADK agent + RAG tool)")
83
+
84
+ for msg in st.session_state.messages:
85
+ role = "user" if msg["role"] == "user" else "assistant"
86
+ avatar = "πŸ‘€" if role == "user" else "πŸ€–"
87
+ with st.chat_message(role, avatar=avatar):
88
+ st.markdown(msg["content"])
89
+
90
+ user_input = st.chat_input("Ask about your uploaded documents...")
91
+
92
+ if user_input:
93
+ st.session_state.messages.append({"role": "user", "content": user_input})
94
+ with st.chat_message("user", avatar="πŸ‘€"):
95
+ st.markdown(user_input)
96
+
97
+ with st.chat_message("assistant", avatar="πŸ€–"):
98
+ placeholder = st.empty()
99
+ placeholder.markdown("β–‹ Thinking…")
100
+
101
+ user_id = st.session_state.user_id
102
+ session_id = f"{user_id}_session"
103
+
104
+ # ADK agent is async, run it properly
105
+ async def run_agent():
106
+ try:
107
+ session = await session_service.get_session(
108
+ app_name=APP_NAME, user_id=user_id, session_id=session_id,
109
+ )
110
+ if not session:
111
+ await session_service.create_session(
112
+ app_name=APP_NAME, user_id=user_id, session_id=session_id,
113
+ )
114
+
115
+ user_content = types.Content(
116
+ role="user",
117
+ parts=[types.Part.from_text(text=user_input)],
118
+ )
119
+
120
+ response_text = ""
121
+ async for event in runner.run_async(
122
+ user_id=user_id, session_id=session_id, new_message=user_content,
123
+ ):
124
+ if event.is_final_response():
125
+ if event.content and event.content.parts:
126
+ response_text = event.content.parts[0].text
127
+ break
128
+
129
+ return response_text or "(No response generated)"
130
+ except Exception as exc:
131
+ return f"**Error occurred:** {str(exc)}"
132
+
133
+ try:
134
+ response = asyncio.run(run_agent())
135
+ except RuntimeError:
136
+ # Fallback if event loop already running
137
+ import concurrent.futures
138
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
139
+ response = pool.submit(asyncio.run, run_agent()).result(timeout=120)
140
+
141
+ placeholder.markdown(response)
142
+ st.session_state.messages.append({"role": "assistant", "content": response})
143
+ st.rerun()
144
+
145
+ # ── Tab 3: Handbook (synchronous) ─────────────────────────────────
146
+ with tab3:
147
+ st.subheader("Generate 20k-word handbook")
148
+ topic = st.text_input(
149
+ "Handbook topic", placeholder="e.g. Retrieval-Augmented Generation",
150
+ )
151
+ if st.button("Generate handbook"):
152
+ if not (topic and topic.strip()):
153
+ st.warning("Enter a topic.")
154
+ else:
155
+ status_placeholder = st.empty()
156
+ progress_msgs: list[str] = []
157
+
158
+ def on_progress(msg):
159
+ progress_msgs.append(msg)
160
+ status_placeholder.text("\n".join(progress_msgs))
161
 
162
+ try:
163
+ full_md = build_handbook(topic.strip(), on_progress=on_progress)
164
+ status_placeholder.success("Generation complete.")
165
+ st.markdown(full_md)
166
+ HANDBOOK_EXPORT_PATH.write_text(full_md, encoding="utf-8")
167
+ st.download_button(
168
+ "Download as Markdown", data=full_md,
169
+ file_name="handbook.md", mime="text/markdown",
170
+ )
171
+ except Exception as e:
172
+ status_placeholder.error(str(e))