Spaces:

AnshulPrasad
/

transcript-rag-summarizer

Sleeping

App Files Files Community

Anshul Prasad commited on 27 days ago

Commit

779be37

1 Parent(s): aa24937

end basic web app support.

Browse files

Files changed (7) hide show

.gitignore +3 -1
Dockerfile +38 -9
app.py +0 -66
config.py +5 -4
frontend/index.html +0 -173
{frontend/assets → guru_app/static/guru}/images/image1.webp +0 -0
requirements.txt +9 -9

.gitignore CHANGED Viewed

@@ -3,4 +3,6 @@ data/subtitles_vtt
 data/transcripts_txt
 .vscode/settings.json
 .venv
-.idea

 data/transcripts_txt
 .vscode/settings.json
 .venv
+.idea
+uv.lock
+pyproject.toml

Dockerfile CHANGED Viewed

@@ -1,17 +1,46 @@
 FROM python:3.10-slim
-WORKDIR /code
-COPY . .
-RUN pip install --upgrade pip \
-    && pip install --no-cache-dir \
-    torch torchvision torchaudio \
-    --extra-index-url https://download.pytorch.org/whl/cpu \
-    && pip install --no-cache-dir -r requirements.txt
-ENV HF_HOME=/tmp/.cache
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 FROM python:3.10-slim
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONUNBUFFERED=1
+RUN apt-get update && apt-get install -y \
+    python3-pip \
+    build-essential \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt /tmp/requirements.txt
+RUN python3 -m pip install --upgrade pip setuptools wheel \
+ && python3 -m pip install --no-cache-dir -r /tmp/requirements.txt \
+ && python3 -c "import django; print('DJANGO_VER=', django.__version__)" || true
+RUN useradd -m -u 1000 appuser \
+ && mkdir -p /app \
+ && chown -R appuser:appuser /app
+COPY --chown=appuser:appuser . /app
+USER appuser
+ENV HOME=/home/appuser
+WORKDIR /app
+# HF Spaces requires port 7860
+EXPOSE 7860
+CMD ["sh", "-c", "\
+  python manage.py migrate --run-syncdb && \
+  python manage.py shell -c \"\
+import os; \
+from django.contrib.auth import get_user_model; \
+User = get_user_model(); \
+email = os.environ.get('ADMIN_EMAIL','admin@example.com'); \
+password = os.environ.get('ADMIN_PASSWORD','changeme123'); \
+User.objects.filter(username='admin').exists() or User.objects.create_superuser('admin', email, password); \
+print('Superuser ready') \
+\" && \
+  gunicorn guru_project.wsgi:application --bind 0.0.0.0:7860 --workers 2 --timeout 120 \
+"]

app.py DELETED Viewed

@@ -1,66 +0,0 @@
-import logging
-import os
-import pickle
-import sys
-from fastapi import FastAPI, Request
-from fastapi.responses import JSONResponse
-from fastapi.middleware.cors import CORSMiddleware
-from fastapi.staticfiles import StaticFiles
-from api.generate_response import generate_response
-from api.retrieve_context import retrieve_transcripts
-from utils.token import count_tokens, trim_to_token_limit
-from config import FILE_PATHS, TRANSCRIPTS, MAX_CONTEXT_TOKENS
-logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s [%(name)s]: %(message)s", handlers=[logging.StreamHandler(sys.stdout)])
-logger = logging.getLogger(__name__)
-app = FastAPI()
-app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["POST"], allow_headers=["*"])
-file_paths = []
-transcripts = []
-@app.on_event("startup")
-def load_data():
-    global file_paths, transcripts
-    logger.info("Loading transcripts")
-    with open(FILE_PATHS, "rb") as f:
-        file_paths = pickle.load(f)
-    with open(TRANSCRIPTS, "rb") as f:
-        transcripts = pickle.load(f)
-    logger.info("Loaded %d transcripts", len(transcripts))
-@app.post("/ask")
-async def ask_question(request: Request):
-    try:
-        data = await request.json()
-        query = data.get("query")
-        if not query:
-            return JSONResponse({"error": "Query cannot be empty"}, status_code=400)
-        retrieved_transcripts = retrieve_transcripts(query, file_paths, transcripts, 15)
-        if not retrieved_transcripts:
-            return JSONResponse({"error": "No relevant transcripts found"}, status_code=404)
-        full_context = " ".join(retrieved_transcripts)
-        limit_context = trim_to_token_limit(full_context, MAX_CONTEXT_TOKENS)
-        context_str = " ".join(limit_context.split("\n"))
-        response = generate_response(query, limit_context)
-        logger.info("Full_context: Tokens=%d, Words=%d", count_tokens(full_context), len(full_context.split(" ")))
-        logger.info("Limit_context: Tokens=%d, Words=%d", count_tokens(limit_context), len(limit_context.split(" ")))
-        return JSONResponse({"answer": response})
-    except Exception as e:
-        logger.exception("Internal error: %s",e)
-        return JSONResponse({"error": "Internal server error. Please try again later."}, status_code=500)
-# Serve frontend from 'frontend/' directory
-frontend_path = os.path.join(os.path.dirname(__file__), "frontend")
-app.mount("/", StaticFiles(directory=frontend_path, html=True), name="frontend")

config.py CHANGED Viewed

@@ -14,9 +14,11 @@ TRANSCRIPT_INDEX = "data/transcript_index.faiss"
 RETRIEVED_TRANSCRIPTS_FILE = Path("outputs/retrieved_transcripts.txt")
 RESPONSE_FILE = Path("outputs/generated_response.txt")
 COOKIES_FILE = Path("utils/youtube_cookies.txt")
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
 MODEL = "llama-3.1-8b-instant"
 MAX_CONTEXT_TOKENS = 4500
 SYSTEM_PROMPT = """
 You are speaking as Spiritual Guru.
@@ -35,12 +37,11 @@ Guidelines:
 - Do not reference yourself as an AI or model.
 - Do not mention that you are imitating someone.
 - If the context is insufficient, say so plainly instead of guessing.
-- "Answer questions strictly using the provided context. "
-- "Do not add external knowledge."
 Structure:
 - Begin by addressing the core misunderstanding.
 - Then explain the principle.
 - End with a reflective or probing statement rather than advice.
-"""

 RETRIEVED_TRANSCRIPTS_FILE = Path("outputs/retrieved_transcripts.txt")
 RESPONSE_FILE = Path("outputs/generated_response.txt")
 COOKIES_FILE = Path("utils/youtube_cookies.txt")
 GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
 MODEL = "llama-3.1-8b-instant"
 MAX_CONTEXT_TOKENS = 4500
 SYSTEM_PROMPT = """
 You are speaking as Spiritual Guru.
 - Do not reference yourself as an AI or model.
 - Do not mention that you are imitating someone.
 - If the context is insufficient, say so plainly instead of guessing.
+- Answer questions strictly using the provided context.
+- Do not add external knowledge.
 Structure:
 - Begin by addressing the core misunderstanding.
 - Then explain the principle.
 - End with a reflective or probing statement rather than advice.
+"""

frontend/index.html DELETED Viewed

@@ -1,173 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <title>Ask Assistant</title>
-  <meta name="viewport" content="width=device-width, initial-scale=1">
-  <!-- Markdown renderer -->
-  <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
-  <style>
-    :root {
-      --primary: #2d3a4a;
-      --accent: #ffb400;
-      --bg-light: #f4f6fa;
-      --card-bg: #ffffff;
-      --fg: #2d3a4a;
-      --fg-light: #f9f9fc;
-      --radius: 12px;            /* corner radius */
-      --shadow: 0 8px 24px rgba(0,0,0,0.08);
-    }
-    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
-    html { scroll-behavior: smooth; }
-    body {
-      font-family: 'Segoe UI', Arial, sans-serif;
-      background: var(--bg-light);
-      color: var(--fg);
-      line-height: 1.5;
-    }
-    /* HERO / BANNER */
-    .hero {
-      position: relative;
-      display: flex;
-      flex-direction: column;
-      align-items: center;
-      justify-content: center;
-      text-align: center;
-      height: clamp(200px, 35vh, 350px);
-      background:
-        linear-gradient(rgba(45,58,74,0.6), rgba(45,58,74,0.3)),
-        url("assets/images/hero-background.jpg") center/cover no-repeat;
-    }
-    .hero img {
-      width: clamp(100px, 15%, 180px);
-      height: auto;
-      border-radius: var(--radius);  /* rounded rectangle */
-      object-fit: cover;
-      margin-bottom: 15px;
-      /* border removed */
-    }
-    .hero h1 {
-      font-size: clamp(1.8rem, 5vw, 2.8rem);
-      color: #fff;
-      z-index: 1;
-    }
-    /* Q&A CARD */
-    .container {
-      position: relative;
-      max-width: 520px;
-      margin: -80px auto 40px;
-      background: var(--card-bg);
-      border-radius: var(--radius);
-      box-shadow: var(--shadow);
-      overflow: hidden;
-      z-index: 2;
-    }
-    .input-group {
-      display: flex;
-      flex-wrap: wrap;
-      gap: 10px;
-      padding: 24px;
-    }
-    .input-group input {
-      flex: 1 1 200px;
-      padding: 14px;
-      font-size: 1rem;
-      border: 1px solid #cbd2db;
-      border-radius: 6px;
-      transition: border-color .2s, box-shadow .2s;
-    }
-    .input-group input:focus {
-      outline: none;
-      border-color: var(--primary);
-      box-shadow: 0 0 0 3px rgba(45,58,74,0.15);
-    }
-    .input-group button {
-      flex: 0 0 auto;
-      padding: 14px 24px;
-      font-size: 1rem;
-      background: var(--accent);
-      color: var(--primary);
-      font-weight: bold;
-      border: none;
-      border-radius: 6px;
-      cursor: pointer;
-      transition: background .2s, transform .1s;
-    }
-    .input-group button:hover {
-      background: #e0a200;
-      transform: translateY(-1px);
-    }
-    #answer-box {
-      padding: 20px 24px;
-      background: var(--fg-light);
-      border-top: 1px solid #e1e5eb;
-      min-height: 100px;
-      font-size: 1.05em;
-    }
-    .loading { color: #888; }
-    /* RESPONSIVE */
-    @media (max-width: 480px) {
-      .hero { height: clamp(180px, 30vh, 260px); }
-      .hero img { width: clamp(80px, 25%, 120px); }
-      .container { margin: -60px 16px 30px; }
-      .input-group { padding: 16px; }
-      .input-group input, .input-group button { flex: 1 1 100%; }
-      .input-group button { margin-top: 8px; }
-      #answer-box { padding: 16px; }
-    }
-  </style>
-</head>
-<body>
-  <!-- HERO / BANNER -->
-  <header class="hero">
-    <img src="assets/images/image1.webp" alt="Assistant">
-    <h1>Ask Assistant</h1>
-  </header>
-  <!-- Q&A CARD -->
-  <div class="container">
-    <div class="input-group">
-      <input
-        id="question"
-        type="text"
-        placeholder="Type your question…"
-        onkeydown="if(event.key==='Enter'){ask();}">
-      <button onclick="ask()">Ask</button>
-    </div>
-    <div id="answer-box"></div>
-  </div>
-  <!-- Q&A Script -->
-  <script>
-    async function ask() {
-      const input = document.getElementById("question");
-      const q = input.value.trim();
-      const box = document.getElementById("answer-box");
-      if (!q) {
-        box.innerHTML = "<span style='color:red;'>Please enter a question.</span>";
-        return;
-      }
-      box.innerHTML = "<span class='loading'>Loading…</span>";
-      try {
-        const res = await fetch("/ask", {
-          method: "POST",
-          headers: {"Content-Type":"application/json"},
-          body: JSON.stringify({ query: q })
-        });
-        const data = await res.json();
-        box.innerHTML = data.answer
-          ? marked.parse(data.answer)
-          : `<span style='color:red;'>${data.error||"No answer returned."}</span>`;
-      } catch {
-        box.innerHTML = "<span style='color:red;'>Error contacting server.</span>";
-      }
-    }
-  </script>
-</body>
-</html>

{frontend/assets → guru_app/static/guru}/images/image1.webp RENAMED Viewed

File without changes

requirements.txt CHANGED Viewed

@@ -1,9 +1,9 @@
-faiss-cpu == 1.9.0
-fastapi == 0.116.1
-pytz == 2025.2
-requests == 2.32.5
-sentence-transformers == 3.0.1
-tiktoken == 0.12.0
-transformers == 4.57.1
-uvicorn == 0.38.0
-groq >= 1.0.0

+django>=4.2,<5.0
+gunicorn>=21.0
+faiss-cpu==1.9.0
+sentence-transformers==3.0.1
+tiktoken==0.12.0
+transformers==4.57.1
+groq>=1.0.0
+requests==2.32.5
+pytz==2025.2