Anshul Prasad commited on
Commit ·
779be37
1
Parent(s): aa24937
end basic web app support.
Browse files- .gitignore +3 -1
- Dockerfile +38 -9
- app.py +0 -66
- config.py +5 -4
- frontend/index.html +0 -173
- {frontend/assets → guru_app/static/guru}/images/image1.webp +0 -0
- requirements.txt +9 -9
.gitignore
CHANGED
|
@@ -3,4 +3,6 @@ data/subtitles_vtt
|
|
| 3 |
data/transcripts_txt
|
| 4 |
.vscode/settings.json
|
| 5 |
.venv
|
| 6 |
-
.idea
|
|
|
|
|
|
|
|
|
| 3 |
data/transcripts_txt
|
| 4 |
.vscode/settings.json
|
| 5 |
.venv
|
| 6 |
+
.idea
|
| 7 |
+
uv.lock
|
| 8 |
+
pyproject.toml
|
Dockerfile
CHANGED
|
@@ -1,17 +1,46 @@
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
-
|
|
|
|
| 4 |
|
| 5 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
|
| 8 |
-
&& pip install --no-cache-dir \
|
| 9 |
-
torch torchvision torchaudio \
|
| 10 |
-
--extra-index-url https://download.pytorch.org/whl/cpu \
|
| 11 |
-
&& pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
+
ENV DEBIAN_FRONTEND=noninteractive \
|
| 4 |
+
PYTHONUNBUFFERED=1
|
| 5 |
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
python3-pip \
|
| 8 |
+
build-essential \
|
| 9 |
+
git \
|
| 10 |
+
curl \
|
| 11 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 12 |
|
| 13 |
+
WORKDIR /app
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
+
COPY requirements.txt /tmp/requirements.txt
|
| 16 |
+
RUN python3 -m pip install --upgrade pip setuptools wheel \
|
| 17 |
+
&& python3 -m pip install --no-cache-dir -r /tmp/requirements.txt \
|
| 18 |
+
&& python3 -c "import django; print('DJANGO_VER=', django.__version__)" || true
|
| 19 |
|
| 20 |
+
RUN useradd -m -u 1000 appuser \
|
| 21 |
+
&& mkdir -p /app \
|
| 22 |
+
&& chown -R appuser:appuser /app
|
| 23 |
|
| 24 |
+
COPY --chown=appuser:appuser . /app
|
| 25 |
|
| 26 |
+
USER appuser
|
| 27 |
+
ENV HOME=/home/appuser
|
| 28 |
+
|
| 29 |
+
WORKDIR /app
|
| 30 |
+
|
| 31 |
+
# HF Spaces requires port 7860
|
| 32 |
+
EXPOSE 7860
|
| 33 |
+
|
| 34 |
+
CMD ["sh", "-c", "\
|
| 35 |
+
python manage.py migrate --run-syncdb && \
|
| 36 |
+
python manage.py shell -c \"\
|
| 37 |
+
import os; \
|
| 38 |
+
from django.contrib.auth import get_user_model; \
|
| 39 |
+
User = get_user_model(); \
|
| 40 |
+
email = os.environ.get('ADMIN_EMAIL','admin@example.com'); \
|
| 41 |
+
password = os.environ.get('ADMIN_PASSWORD','changeme123'); \
|
| 42 |
+
User.objects.filter(username='admin').exists() or User.objects.create_superuser('admin', email, password); \
|
| 43 |
+
print('Superuser ready') \
|
| 44 |
+
\" && \
|
| 45 |
+
gunicorn guru_project.wsgi:application --bind 0.0.0.0:7860 --workers 2 --timeout 120 \
|
| 46 |
+
"]
|
app.py
DELETED
|
@@ -1,66 +0,0 @@
|
|
| 1 |
-
import logging
|
| 2 |
-
import os
|
| 3 |
-
import pickle
|
| 4 |
-
import sys
|
| 5 |
-
from fastapi import FastAPI, Request
|
| 6 |
-
from fastapi.responses import JSONResponse
|
| 7 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
-
from fastapi.staticfiles import StaticFiles
|
| 9 |
-
|
| 10 |
-
from api.generate_response import generate_response
|
| 11 |
-
from api.retrieve_context import retrieve_transcripts
|
| 12 |
-
from utils.token import count_tokens, trim_to_token_limit
|
| 13 |
-
from config import FILE_PATHS, TRANSCRIPTS, MAX_CONTEXT_TOKENS
|
| 14 |
-
|
| 15 |
-
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [%(name)s]: %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
|
| 16 |
-
logger = logging.getLogger(__name__)
|
| 17 |
-
|
| 18 |
-
app = FastAPI()
|
| 19 |
-
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["POST"], allow_headers=["*"])
|
| 20 |
-
|
| 21 |
-
file_paths = []
|
| 22 |
-
transcripts = []
|
| 23 |
-
|
| 24 |
-
@app.on_event("startup")
|
| 25 |
-
def load_data():
|
| 26 |
-
global file_paths, transcripts
|
| 27 |
-
logger.info("Loading transcripts")
|
| 28 |
-
|
| 29 |
-
with open(FILE_PATHS, "rb") as f:
|
| 30 |
-
file_paths = pickle.load(f)
|
| 31 |
-
with open(TRANSCRIPTS, "rb") as f:
|
| 32 |
-
transcripts = pickle.load(f)
|
| 33 |
-
|
| 34 |
-
logger.info("Loaded %d transcripts", len(transcripts))
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
@app.post("/ask")
|
| 38 |
-
async def ask_question(request: Request):
|
| 39 |
-
try:
|
| 40 |
-
data = await request.json()
|
| 41 |
-
|
| 42 |
-
query = data.get("query")
|
| 43 |
-
if not query:
|
| 44 |
-
return JSONResponse({"error": "Query cannot be empty"}, status_code=400)
|
| 45 |
-
|
| 46 |
-
retrieved_transcripts = retrieve_transcripts(query, file_paths, transcripts, 15)
|
| 47 |
-
if not retrieved_transcripts:
|
| 48 |
-
return JSONResponse({"error": "No relevant transcripts found"}, status_code=404)
|
| 49 |
-
|
| 50 |
-
full_context = " ".join(retrieved_transcripts)
|
| 51 |
-
limit_context = trim_to_token_limit(full_context, MAX_CONTEXT_TOKENS)
|
| 52 |
-
context_str = " ".join(limit_context.split("\n"))
|
| 53 |
-
response = generate_response(query, limit_context)
|
| 54 |
-
|
| 55 |
-
logger.info("Full_context: Tokens=%d, Words=%d", count_tokens(full_context), len(full_context.split(" ")))
|
| 56 |
-
logger.info("Limit_context: Tokens=%d, Words=%d", count_tokens(limit_context), len(limit_context.split(" ")))
|
| 57 |
-
|
| 58 |
-
return JSONResponse({"answer": response})
|
| 59 |
-
|
| 60 |
-
except Exception as e:
|
| 61 |
-
logger.exception("Internal error: %s",e)
|
| 62 |
-
return JSONResponse({"error": "Internal server error. Please try again later."}, status_code=500)
|
| 63 |
-
|
| 64 |
-
# Serve frontend from 'frontend/' directory
|
| 65 |
-
frontend_path = os.path.join(os.path.dirname(__file__), "frontend")
|
| 66 |
-
app.mount("/", StaticFiles(directory=frontend_path, html=True), name="frontend")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
config.py
CHANGED
|
@@ -14,9 +14,11 @@ TRANSCRIPT_INDEX = "data/transcript_index.faiss"
|
|
| 14 |
RETRIEVED_TRANSCRIPTS_FILE = Path("outputs/retrieved_transcripts.txt")
|
| 15 |
RESPONSE_FILE = Path("outputs/generated_response.txt")
|
| 16 |
COOKIES_FILE = Path("utils/youtube_cookies.txt")
|
|
|
|
| 17 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 18 |
MODEL = "llama-3.1-8b-instant"
|
| 19 |
MAX_CONTEXT_TOKENS = 4500
|
|
|
|
| 20 |
SYSTEM_PROMPT = """
|
| 21 |
You are speaking as Spiritual Guru.
|
| 22 |
|
|
@@ -35,12 +37,11 @@ Guidelines:
|
|
| 35 |
- Do not reference yourself as an AI or model.
|
| 36 |
- Do not mention that you are imitating someone.
|
| 37 |
- If the context is insufficient, say so plainly instead of guessing.
|
| 38 |
-
-
|
| 39 |
-
-
|
| 40 |
|
| 41 |
Structure:
|
| 42 |
- Begin by addressing the core misunderstanding.
|
| 43 |
- Then explain the principle.
|
| 44 |
- End with a reflective or probing statement rather than advice.
|
| 45 |
-
|
| 46 |
-
"""
|
|
|
|
| 14 |
RETRIEVED_TRANSCRIPTS_FILE = Path("outputs/retrieved_transcripts.txt")
|
| 15 |
RESPONSE_FILE = Path("outputs/generated_response.txt")
|
| 16 |
COOKIES_FILE = Path("utils/youtube_cookies.txt")
|
| 17 |
+
|
| 18 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 19 |
MODEL = "llama-3.1-8b-instant"
|
| 20 |
MAX_CONTEXT_TOKENS = 4500
|
| 21 |
+
|
| 22 |
SYSTEM_PROMPT = """
|
| 23 |
You are speaking as Spiritual Guru.
|
| 24 |
|
|
|
|
| 37 |
- Do not reference yourself as an AI or model.
|
| 38 |
- Do not mention that you are imitating someone.
|
| 39 |
- If the context is insufficient, say so plainly instead of guessing.
|
| 40 |
+
- Answer questions strictly using the provided context.
|
| 41 |
+
- Do not add external knowledge.
|
| 42 |
|
| 43 |
Structure:
|
| 44 |
- Begin by addressing the core misunderstanding.
|
| 45 |
- Then explain the principle.
|
| 46 |
- End with a reflective or probing statement rather than advice.
|
| 47 |
+
"""
|
|
|
frontend/index.html
DELETED
|
@@ -1,173 +0,0 @@
|
|
| 1 |
-
<!DOCTYPE html>
|
| 2 |
-
<html lang="en">
|
| 3 |
-
<head>
|
| 4 |
-
<meta charset="UTF-8">
|
| 5 |
-
<title>Ask Assistant</title>
|
| 6 |
-
<meta name="viewport" content="width=device-width, initial-scale=1">
|
| 7 |
-
<!-- Markdown renderer -->
|
| 8 |
-
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 9 |
-
<style>
|
| 10 |
-
:root {
|
| 11 |
-
--primary: #2d3a4a;
|
| 12 |
-
--accent: #ffb400;
|
| 13 |
-
--bg-light: #f4f6fa;
|
| 14 |
-
--card-bg: #ffffff;
|
| 15 |
-
--fg: #2d3a4a;
|
| 16 |
-
--fg-light: #f9f9fc;
|
| 17 |
-
--radius: 12px; /* corner radius */
|
| 18 |
-
--shadow: 0 8px 24px rgba(0,0,0,0.08);
|
| 19 |
-
}
|
| 20 |
-
*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
|
| 21 |
-
html { scroll-behavior: smooth; }
|
| 22 |
-
body {
|
| 23 |
-
font-family: 'Segoe UI', Arial, sans-serif;
|
| 24 |
-
background: var(--bg-light);
|
| 25 |
-
color: var(--fg);
|
| 26 |
-
line-height: 1.5;
|
| 27 |
-
}
|
| 28 |
-
|
| 29 |
-
/* HERO / BANNER */
|
| 30 |
-
.hero {
|
| 31 |
-
position: relative;
|
| 32 |
-
display: flex;
|
| 33 |
-
flex-direction: column;
|
| 34 |
-
align-items: center;
|
| 35 |
-
justify-content: center;
|
| 36 |
-
text-align: center;
|
| 37 |
-
height: clamp(200px, 35vh, 350px);
|
| 38 |
-
background:
|
| 39 |
-
linear-gradient(rgba(45,58,74,0.6), rgba(45,58,74,0.3)),
|
| 40 |
-
url("assets/images/hero-background.jpg") center/cover no-repeat;
|
| 41 |
-
}
|
| 42 |
-
.hero img {
|
| 43 |
-
width: clamp(100px, 15%, 180px);
|
| 44 |
-
height: auto;
|
| 45 |
-
border-radius: var(--radius); /* rounded rectangle */
|
| 46 |
-
object-fit: cover;
|
| 47 |
-
margin-bottom: 15px;
|
| 48 |
-
/* border removed */
|
| 49 |
-
}
|
| 50 |
-
.hero h1 {
|
| 51 |
-
font-size: clamp(1.8rem, 5vw, 2.8rem);
|
| 52 |
-
color: #fff;
|
| 53 |
-
z-index: 1;
|
| 54 |
-
}
|
| 55 |
-
|
| 56 |
-
/* Q&A CARD */
|
| 57 |
-
.container {
|
| 58 |
-
position: relative;
|
| 59 |
-
max-width: 520px;
|
| 60 |
-
margin: -80px auto 40px;
|
| 61 |
-
background: var(--card-bg);
|
| 62 |
-
border-radius: var(--radius);
|
| 63 |
-
box-shadow: var(--shadow);
|
| 64 |
-
overflow: hidden;
|
| 65 |
-
z-index: 2;
|
| 66 |
-
}
|
| 67 |
-
.input-group {
|
| 68 |
-
display: flex;
|
| 69 |
-
flex-wrap: wrap;
|
| 70 |
-
gap: 10px;
|
| 71 |
-
padding: 24px;
|
| 72 |
-
}
|
| 73 |
-
.input-group input {
|
| 74 |
-
flex: 1 1 200px;
|
| 75 |
-
padding: 14px;
|
| 76 |
-
font-size: 1rem;
|
| 77 |
-
border: 1px solid #cbd2db;
|
| 78 |
-
border-radius: 6px;
|
| 79 |
-
transition: border-color .2s, box-shadow .2s;
|
| 80 |
-
}
|
| 81 |
-
.input-group input:focus {
|
| 82 |
-
outline: none;
|
| 83 |
-
border-color: var(--primary);
|
| 84 |
-
box-shadow: 0 0 0 3px rgba(45,58,74,0.15);
|
| 85 |
-
}
|
| 86 |
-
.input-group button {
|
| 87 |
-
flex: 0 0 auto;
|
| 88 |
-
padding: 14px 24px;
|
| 89 |
-
font-size: 1rem;
|
| 90 |
-
background: var(--accent);
|
| 91 |
-
color: var(--primary);
|
| 92 |
-
font-weight: bold;
|
| 93 |
-
border: none;
|
| 94 |
-
border-radius: 6px;
|
| 95 |
-
cursor: pointer;
|
| 96 |
-
transition: background .2s, transform .1s;
|
| 97 |
-
}
|
| 98 |
-
.input-group button:hover {
|
| 99 |
-
background: #e0a200;
|
| 100 |
-
transform: translateY(-1px);
|
| 101 |
-
}
|
| 102 |
-
|
| 103 |
-
#answer-box {
|
| 104 |
-
padding: 20px 24px;
|
| 105 |
-
background: var(--fg-light);
|
| 106 |
-
border-top: 1px solid #e1e5eb;
|
| 107 |
-
min-height: 100px;
|
| 108 |
-
font-size: 1.05em;
|
| 109 |
-
}
|
| 110 |
-
.loading { color: #888; }
|
| 111 |
-
|
| 112 |
-
/* RESPONSIVE */
|
| 113 |
-
@media (max-width: 480px) {
|
| 114 |
-
.hero { height: clamp(180px, 30vh, 260px); }
|
| 115 |
-
.hero img { width: clamp(80px, 25%, 120px); }
|
| 116 |
-
.container { margin: -60px 16px 30px; }
|
| 117 |
-
.input-group { padding: 16px; }
|
| 118 |
-
.input-group input, .input-group button { flex: 1 1 100%; }
|
| 119 |
-
.input-group button { margin-top: 8px; }
|
| 120 |
-
#answer-box { padding: 16px; }
|
| 121 |
-
}
|
| 122 |
-
</style>
|
| 123 |
-
</head>
|
| 124 |
-
<body>
|
| 125 |
-
|
| 126 |
-
<!-- HERO / BANNER -->
|
| 127 |
-
<header class="hero">
|
| 128 |
-
<img src="assets/images/image1.webp" alt="Assistant">
|
| 129 |
-
<h1>Ask Assistant</h1>
|
| 130 |
-
</header>
|
| 131 |
-
|
| 132 |
-
<!-- Q&A CARD -->
|
| 133 |
-
<div class="container">
|
| 134 |
-
<div class="input-group">
|
| 135 |
-
<input
|
| 136 |
-
id="question"
|
| 137 |
-
type="text"
|
| 138 |
-
placeholder="Type your question…"
|
| 139 |
-
onkeydown="if(event.key==='Enter'){ask();}">
|
| 140 |
-
<button onclick="ask()">Ask</button>
|
| 141 |
-
</div>
|
| 142 |
-
<div id="answer-box"></div>
|
| 143 |
-
</div>
|
| 144 |
-
|
| 145 |
-
<!-- Q&A Script -->
|
| 146 |
-
<script>
|
| 147 |
-
async function ask() {
|
| 148 |
-
const input = document.getElementById("question");
|
| 149 |
-
const q = input.value.trim();
|
| 150 |
-
const box = document.getElementById("answer-box");
|
| 151 |
-
if (!q) {
|
| 152 |
-
box.innerHTML = "<span style='color:red;'>Please enter a question.</span>";
|
| 153 |
-
return;
|
| 154 |
-
}
|
| 155 |
-
box.innerHTML = "<span class='loading'>Loading…</span>";
|
| 156 |
-
try {
|
| 157 |
-
const res = await fetch("/ask", {
|
| 158 |
-
method: "POST",
|
| 159 |
-
headers: {"Content-Type":"application/json"},
|
| 160 |
-
body: JSON.stringify({ query: q })
|
| 161 |
-
});
|
| 162 |
-
const data = await res.json();
|
| 163 |
-
box.innerHTML = data.answer
|
| 164 |
-
? marked.parse(data.answer)
|
| 165 |
-
: `<span style='color:red;'>${data.error||"No answer returned."}</span>`;
|
| 166 |
-
} catch {
|
| 167 |
-
box.innerHTML = "<span style='color:red;'>Error contacting server.</span>";
|
| 168 |
-
}
|
| 169 |
-
}
|
| 170 |
-
</script>
|
| 171 |
-
|
| 172 |
-
</body>
|
| 173 |
-
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
{frontend/assets → guru_app/static/guru}/images/image1.webp
RENAMED
|
File without changes
|
requirements.txt
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
| 1 |
+
django>=4.2,<5.0
|
| 2 |
+
gunicorn>=21.0
|
| 3 |
+
faiss-cpu==1.9.0
|
| 4 |
+
sentence-transformers==3.0.1
|
| 5 |
+
tiktoken==0.12.0
|
| 6 |
+
transformers==4.57.1
|
| 7 |
+
groq>=1.0.0
|
| 8 |
+
requests==2.32.5
|
| 9 |
+
pytz==2025.2
|