Spaces:

DevBytes-OG
/

CTRS

Sleeping

App Files Files Community

quantumbit commited on 11 days ago

Commit

025f187

0 Parent(s):

initial commit

Browse files

Files changed (9) hide show

.github/workflows/deploy-hf-space.yml +34 -0
.gitignore +4 -0
Dockerfile +16 -0
README.md +8 -0
agents.py +193 -0
main.py +137 -0
models.py +11 -0
requirements.txt +7 -0
utils/filtering.py +86 -0

.github/workflows/deploy-hf-space.yml ADDED Viewed

	@@ -0,0 +1,34 @@

+name: Deploy to HF Space
+on:
+  push:
+permissions:
+  contents: read
+jobs:
+  deploy:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+      - name: Push to Hugging Face Space
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_SPACE: ${{ secrets.HF_SPACE }}
+        run: |
+          set -euo pipefail
+          if [ -z "${HF_TOKEN:-}" ] || [ -z "${HF_SPACE:-}" ]; then
+            echo "Missing HF_TOKEN or HF_SPACE secrets."
+            exit 1
+          fi
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+          git remote add hf "https://user:${HF_TOKEN}@huggingface.co/spaces/${HF_SPACE}.git"
+          git push --force hf HEAD:main

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+env
+.env
+__pycache__
+output*

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+title: CTRS
+emoji: 😻
+colorFrom: green
+colorTo: blue
+sdk: docker
+pinned: false
+---

agents.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import os
+import sys
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+REQUESTY_API_KEY = os.getenv("REQUESTY_API_KEY")
+BASE_URL         = "https://router.requesty.ai/v1"
+MODEL            = "openai/gpt-4o"
+MAX_ITERATIONS   = 3
+client = OpenAI(
+    api_key=REQUESTY_API_KEY,
+    base_url=BASE_URL,
+    default_headers={
+        "HTTP-Referer": "https://yourapp.com",
+        "X-Title":      "Log Analyzer",
+    },
+)
+def chat(system: str, user: str, temperature: float = 0.3) -> str:
+    """Single-turn chat completion."""
+    response = client.chat.completions.create(
+        model=MODEL,
+        temperature=temperature,
+        messages=[
+            {"role": "system", "content": system},
+            {"role": "user",   "content": user},
+        ],
+    )
+    return response.choices[0].message.content.strip()
+def load_file(path: str) -> str:
+    with open(path, "r", encoding="utf-8") as f:
+        return f.read().strip()
+def save_file(path: str, content: str) -> None:
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(content)
+DRAFT_SYSTEM = """
+You are a UX researcher and cognitive analyst specializing in mobile app behavior analysis.
+Given raw timestamped logs of a user session in a food-ordering app (Zomato / Swiggy),
+produce a **Cognitive Task Report (CTR)** that breaks down:
+1. **Session Overview** – brief summary (user, app, date, outcome).
+2. **Step-by-Step Breakdown** – for every meaningful action, describe:
+   - Timestamp
+   - Action taken
+   - Inferred thought process / intent behind the action
+   - Any friction, hesitation, or decision-making moment observed
+3. **Key Decision Points** – moments where the user made a notable choice.
+4. **UX Observations** – patterns, pain-points, or positive flows noticed.
+5. **Session Outcome** – result of the session.
+Format the report in clean Markdown with headers and numbered lists.
+Be thorough but concise. Base everything strictly on the logs provided.
+""".strip()
+VALIDATOR_SYSTEM = """
+You are a meticulous QA analyst reviewing a Cognitive Task Report (CTR) against original app session logs.
+Your job is to identify **every inconsistency** between the CTR and the logs, including:
+- Missing events that appear in the logs but not in the CTR
+- Incorrect timestamps cited in the CTR
+- Wrong prices, item names, order IDs, ratings, or other factual details
+- Misinterpreted user intent that contradicts observable log evidence
+- Extra events in the CTR that do not appear in the logs
+Output your findings as a **numbered list of issues** in this exact format:
+ISSUE 1: <brief title>
+  - Location in CTR: <section / step reference>
+  - Problem: <what is wrong>
+  - Evidence in logs: <exact log line or detail>
+If there are NO issues, output exactly:
+  NO_ISSUES_FOUND
+Be exhaustive. Do not skip minor discrepancies.
+""".strip()
+CORRECTION_SYSTEM = """
+You are a precise technical writer. You will be given:
+1. The original raw logs
+2. A Cognitive Task Report (CTR) that may contain errors
+3. A validation report listing specific issues
+Your task is to produce a **fully corrected CTR** that:
+- Fixes every issue listed in the validation report
+- Retains all correct content from the original CTR
+- Adds any missing log events with correct timestamps and analysis
+- Does NOT introduce new information not present in the logs
+Output the complete corrected CTR in clean Markdown. Do not include any preamble
+like "Here is the corrected CTR" — output only the report itself.
+""".strip()
+def initial_draft_agent(logs: str) -> str:
+    print("\n[Agent 1] Initial Draft Agent running...")
+    prompt = f"Here are the session logs:\n\n{logs}"
+    ctr = chat(DRAFT_SYSTEM, prompt)
+    print("  → Draft CTR produced.")
+    return ctr
+def validator_agent(logs: str, ctr: str) -> str:
+    print("\n[Agent 2] Validator Agent running...")
+    prompt = (
+        "## Original Logs\n\n"
+        f"{logs}\n\n"
+        "## Current CTR\n\n"
+        f"{ctr}"
+    )
+    report = chat(VALIDATOR_SYSTEM, prompt, temperature=0.1)
+    print("  → Validation report produced.")
+    return report
+def correction_agent(logs: str, ctr: str, issues: str) -> str:
+    print("\n[Agent 3] Correction Agent running...")
+    prompt = (
+        "## Original Logs\n\n"
+        f"{logs}\n\n"
+        "## Current CTR (may have errors)\n\n"
+        f"{ctr}\n\n"
+        "## Validation Issues to Fix\n\n"
+        f"{issues}"
+    )
+    corrected = chat(CORRECTION_SYSTEM, prompt)
+    print("  → Corrected CTR produced.")
+    return corrected
+def run_pipeline(logs: str, output_dir: str = "output") -> str:
+    ctr = initial_draft_agent(logs)
+    save_file(f"{output_dir}/ctr_draft.md", ctr)
+    print(f"  → Saved: {output_dir}/ctr_draft.md")
+    # Step 2+3 – Iterative validation & correction
+    for iteration in range(1, MAX_ITERATIONS + 1):
+        print(f"\n{'='*60}")
+        print(f"Iteration {iteration} of {MAX_ITERATIONS}")
+        print(f"{'='*60}")
+        issues = validator_agent(logs, ctr)
+        save_file(f"{output_dir}/validation_iter_{iteration}.md", issues)
+        print(f"  → Saved: {output_dir}/validation_iter_{iteration}.md")
+        if "NO_ISSUES_FOUND" in issues:
+            print(f"\n No issues found in iteration {iteration}. Pipeline complete.")
+            break
+        ctr = correction_agent(logs, ctr, issues)
+        save_file(f"{output_dir}/ctr_iter_{iteration}.md", ctr)
+        print(f"  → Saved: {output_dir}/ctr_iter_{iteration}.md")
+        if iteration == MAX_ITERATIONS:
+            print(f"\n  Reached maximum iterations ({MAX_ITERATIONS}). Saving final CTR.")
+    # Save final output
+    save_file(f"{output_dir}/ctr_final.md", ctr)
+    print(f"\n Final CTR saved to: {output_dir}/ctr_final.md")
+    return ctr
+# ─── ENTRY POINT ───────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    log_file = sys.argv[1] if len(sys.argv) > 1 else "example-log.txt"
+    out_dir  = sys.argv[2] if len(sys.argv) > 2 else "output"
+    if not os.path.exists(log_file):
+        print(f"Error: Log file '{log_file}' not found.")
+        sys.exit(1)
+    print(f"   Starting Log Analyzer Pipeline")
+    print(f"   Log file : {log_file}")
+    print(f"   Output   : {out_dir}/")
+    print(f"   Model    : {MODEL}")
+    logs = load_file(log_file)
+    run_pipeline(logs, out_dir)

main.py ADDED Viewed

	@@ -0,0 +1,137 @@

+from fastapi import FastAPI, HTTPException
+from fastapi import Body
+from pydantic import BaseModel
+from openai import AsyncOpenAI
+import asyncio
+import json
+import os
+import uuid
+from dotenv import load_dotenv
+from models import ClusterRequest
+from utils.filtering import  get_representatives
+os.makedirs("output", exist_ok=True)
+# Import the existing pipeline
+from agents import run_pipeline
+load_dotenv()
+app = FastAPI()
+# Configure AsyncOpenAI Client with Requesty settings
+client = AsyncOpenAI(
+    api_key=os.environ.get("REQUESTY_API_KEY", "missing_key"),
+    base_url="https://router.requesty.ai/v1",
+    default_headers={
+        "HTTP-Referer": "https://yourapp.com",
+        "X-Title": "My App",
+    }
+)
+class LogsRequest(BaseModel):
+    logs: str
+async def get_insights(report: str) -> str:
+    response = await client.chat.completions.create(
+        model="openai/gpt-4o",
+        messages=[
+            {"role": "system", "content": "You are an Insights Agent. Analyze the provided user session logs/report. Provide exactly 5-7 short, easy-to-read numbered points containing the key behavioral insights. Ensure these insights are highly specific to the details in the provided story/logs and avoid any generic observations."},
+            {"role": "user", "content": f"Here is the report generated from the logs:\n{report}"}
+        ]
+    )
+    return response.choices[0].message.content
+async def get_state_flow(report: str) -> str:
+    response = await client.chat.completions.create(
+        model="openai/gpt-4o",
+        messages=[
+             {"role": "system", "content": "You are a State Flow Agent. Analyze the report/logs and generate a high-level state flow diagram in Mermaid JS format (flowchart TD). Keep it to a maximum of 10 nodes. IMPORTANT: You must ONLY include the REASON on the edges (arrows) for turning point decisions, moments of friction, or loops (e.g., when the user goes back to adjust the cart because of price constraints). DO NOT annotate standard forward steps (like opening the app, or standard progression) with descriptions on the edges. If the user loops back or returns to a previous state, correctly map the arrow back to the previous node and explicitly state the reason on that edge. Return ONLY the raw Mermaid code string without markdown wrappers (e.g., no ```mermaid)."},
+             {"role": "user", "content": f"Here is the report generated from the logs:\n{report}"}
+        ]
+    )
+    return response.choices[0].message.content.strip()
+async def get_suggestions(report: str) -> str:
+    response = await client.chat.completions.create(
+        model="openai/gpt-4o-mini",
+        messages=[
+            {"role": "system", "content": "You are a Suggestion Agent. Review the user progression and provide ONLY 5 actionable business recommendations to improve conversion. Format your response strictly as a numbered list of short, concise points. Make all suggestions highly specific to the provided story and logs; do not include generic advice."},
+            {"role": "user", "content": f"Here is the report generated from the logs:\n{report}"}
+        ]
+    )
+    return response.choices[0].message.content
+@app.post("/processed-logs")
+async def process_logs(request: LogsRequest):
+    try:
+        # 1. Run the existing pipeline to get the report (CTR)
+        # Since it is synchronous and does I/O, we offload to a thread
+        output_dir = f"output_{uuid.uuid4().hex}"
+        report = await asyncio.to_thread(run_pipeline, request.logs, output_dir)
+        # 2. Run all three new agents concurrently using the generated report string
+        insights, state_flow, suggestions = await asyncio.gather(
+            get_insights(report),
+            get_state_flow(report),
+            get_suggestions(report)
+        )
+        combined = f"""
+# REPORT:
+{report}
+# STATE FLOW
+```mermaid
+{state_flow.replace("\n", """
+""")}
+```
+# INSIGHTS:
+{insights}
+# SUGESTIONS:
+{suggestions}
+"""
+        os.makedirs("output", exist_ok=True)
+        x = len(os.listdir("output"))
+        file_name = f"output/output{x}.md"
+        with open(file_name, 'w', encoding='utf-8') as f:
+            f.write(combined)
+        # 3. Return the final output
+        return {
+            "report": report,
+            "insights": insights,
+            "state_flow": state_flow,
+            "suggestions": suggestions
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/get_representatives")
+def cluster_texts(request: ClusterRequest = Body(...)):
+    if not request.texts or len(request.texts) == 0:
+        raise HTTPException(status_code=400, detail="texts list cannot be empty")
+    try:
+        reps = get_representatives(
+            request.texts,
+            request.eps,
+            request.min_samples
+        )
+        return {
+            "input_size": len(request.texts),
+            "output_size": len(reps),
+            "representatives": reps
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))

models.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from pydantic import BaseModel
+from typing import List
+class ClusterRequest(BaseModel):
+    texts: List[str]
+    eps: float = 0.4
+    min_samples: int = 2
+ClusterRequest.model_rebuild()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+dotenv
+openai
+fastapi
+uvicorn
+requests
+numpy
+scikit-learn

utils/filtering.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import requests
+import numpy as np
+from typing import List
+from sklearn.metrics.pairwise import cosine_distances
+from sklearn.cluster import DBSCAN
+import os
+from dotenv import load_dotenv
+load_dotenv()
+API_KEY = os.getenv("REQUESTY_API_KEY")
+API_URL = "https://router.requesty.ai/v1/embeddings"
+def get_embeddings(texts: List[str]) -> np.ndarray:
+    response = requests.post(
+        API_URL,
+        headers={
+            "Authorization": f"Bearer {API_KEY}",
+            "Content-Type": "application/json",
+        },
+        json={
+            "input": texts,
+            "model": "openai/text-embedding-3-small",
+            "encoding_format": "float"
+        },
+        timeout=30
+    )
+    if response.status_code != 200:
+        raise Exception(f"Embedding API error: {response.text}")
+    data = response.json()
+    embeddings = [item["embedding"] for item in data["data"]]
+    return np.array(embeddings)
+def batched_embeddings(texts: List[str], batch_size=50):
+    all_embeddings = []
+    for i in range(0, len(texts), batch_size):
+        batch = texts[i:i + batch_size]
+        emb = get_embeddings(batch)
+        all_embeddings.append(emb)
+    return np.vstack(all_embeddings)
+def get_representatives(texts: List[str], eps: float, min_samples: int):
+    embeddings = batched_embeddings(texts)
+    distance_matrix = cosine_distances(embeddings)
+    clustering = DBSCAN(
+        eps=eps,
+        min_samples=min_samples,
+        metric="precomputed"
+    ).fit(distance_matrix)
+    labels = clustering.labels_
+    clusters = {}
+    for idx, label in enumerate(labels):
+        if label == -1:
+            clusters[f"noise_{idx}"] = [idx]
+        else:
+            clusters.setdefault(label, []).append(idx)
+    representatives = []
+    for _, indices in clusters.items():
+        cluster_embeddings = embeddings[indices]
+        centroid = np.mean(cluster_embeddings, axis=0)
+        distances = cosine_distances(
+            cluster_embeddings, centroid.reshape(1, -1)
+        ).flatten()
+        best_idx = indices[np.argmin(distances)]
+        representatives.append(texts[best_idx])
+    return representatives