quantumbit commited on
Commit
025f187
·
0 Parent(s):

initial commit

Browse files
Files changed (9) hide show
  1. .github/workflows/deploy-hf-space.yml +34 -0
  2. .gitignore +4 -0
  3. Dockerfile +16 -0
  4. README.md +8 -0
  5. agents.py +193 -0
  6. main.py +137 -0
  7. models.py +11 -0
  8. requirements.txt +7 -0
  9. utils/filtering.py +86 -0
.github/workflows/deploy-hf-space.yml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Deploy to HF Space
2
+
3
+ on:
4
+ push:
5
+
6
+ permissions:
7
+ contents: read
8
+
9
+ jobs:
10
+ deploy:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - name: Checkout
14
+ uses: actions/checkout@v4
15
+ with:
16
+ fetch-depth: 0
17
+
18
+ - name: Push to Hugging Face Space
19
+ env:
20
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
21
+ HF_SPACE: ${{ secrets.HF_SPACE }}
22
+ run: |
23
+ set -euo pipefail
24
+
25
+ if [ -z "${HF_TOKEN:-}" ] || [ -z "${HF_SPACE:-}" ]; then
26
+ echo "Missing HF_TOKEN or HF_SPACE secrets."
27
+ exit 1
28
+ fi
29
+
30
+ git config user.name "github-actions[bot]"
31
+ git config user.email "github-actions[bot]@users.noreply.github.com"
32
+
33
+ git remote add hf "https://user:${HF_TOKEN}@huggingface.co/spaces/${HF_SPACE}.git"
34
+ git push --force hf HEAD:main
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ env
2
+ .env
3
+ __pycache__
4
+ output*
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: CTRS
3
+ emoji: 😻
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ ---
agents.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from openai import OpenAI
4
+
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+
9
+
10
+ REQUESTY_API_KEY = os.getenv("REQUESTY_API_KEY")
11
+ BASE_URL = "https://router.requesty.ai/v1"
12
+ MODEL = "openai/gpt-4o"
13
+ MAX_ITERATIONS = 3
14
+
15
+ client = OpenAI(
16
+ api_key=REQUESTY_API_KEY,
17
+ base_url=BASE_URL,
18
+ default_headers={
19
+ "HTTP-Referer": "https://yourapp.com",
20
+ "X-Title": "Log Analyzer",
21
+ },
22
+ )
23
+
24
+
25
+ def chat(system: str, user: str, temperature: float = 0.3) -> str:
26
+ """Single-turn chat completion."""
27
+ response = client.chat.completions.create(
28
+ model=MODEL,
29
+ temperature=temperature,
30
+ messages=[
31
+ {"role": "system", "content": system},
32
+ {"role": "user", "content": user},
33
+ ],
34
+ )
35
+ return response.choices[0].message.content.strip()
36
+
37
+
38
+ def load_file(path: str) -> str:
39
+ with open(path, "r", encoding="utf-8") as f:
40
+ return f.read().strip()
41
+
42
+
43
+ def save_file(path: str, content: str) -> None:
44
+ os.makedirs(os.path.dirname(path), exist_ok=True)
45
+ with open(path, "w", encoding="utf-8") as f:
46
+ f.write(content)
47
+
48
+
49
+ DRAFT_SYSTEM = """
50
+ You are a UX researcher and cognitive analyst specializing in mobile app behavior analysis.
51
+
52
+ Given raw timestamped logs of a user session in a food-ordering app (Zomato / Swiggy),
53
+ produce a **Cognitive Task Report (CTR)** that breaks down:
54
+
55
+ 1. **Session Overview** – brief summary (user, app, date, outcome).
56
+ 2. **Step-by-Step Breakdown** – for every meaningful action, describe:
57
+ - Timestamp
58
+ - Action taken
59
+ - Inferred thought process / intent behind the action
60
+ - Any friction, hesitation, or decision-making moment observed
61
+ 3. **Key Decision Points** – moments where the user made a notable choice.
62
+ 4. **UX Observations** – patterns, pain-points, or positive flows noticed.
63
+ 5. **Session Outcome** – result of the session.
64
+
65
+ Format the report in clean Markdown with headers and numbered lists.
66
+ Be thorough but concise. Base everything strictly on the logs provided.
67
+ """.strip()
68
+
69
+ VALIDATOR_SYSTEM = """
70
+ You are a meticulous QA analyst reviewing a Cognitive Task Report (CTR) against original app session logs.
71
+
72
+ Your job is to identify **every inconsistency** between the CTR and the logs, including:
73
+ - Missing events that appear in the logs but not in the CTR
74
+ - Incorrect timestamps cited in the CTR
75
+ - Wrong prices, item names, order IDs, ratings, or other factual details
76
+ - Misinterpreted user intent that contradicts observable log evidence
77
+ - Extra events in the CTR that do not appear in the logs
78
+
79
+ Output your findings as a **numbered list of issues** in this exact format:
80
+
81
+ ISSUE 1: <brief title>
82
+ - Location in CTR: <section / step reference>
83
+ - Problem: <what is wrong>
84
+ - Evidence in logs: <exact log line or detail>
85
+
86
+ If there are NO issues, output exactly:
87
+ NO_ISSUES_FOUND
88
+
89
+ Be exhaustive. Do not skip minor discrepancies.
90
+ """.strip()
91
+
92
+ CORRECTION_SYSTEM = """
93
+ You are a precise technical writer. You will be given:
94
+ 1. The original raw logs
95
+ 2. A Cognitive Task Report (CTR) that may contain errors
96
+ 3. A validation report listing specific issues
97
+
98
+ Your task is to produce a **fully corrected CTR** that:
99
+ - Fixes every issue listed in the validation report
100
+ - Retains all correct content from the original CTR
101
+ - Adds any missing log events with correct timestamps and analysis
102
+ - Does NOT introduce new information not present in the logs
103
+
104
+ Output the complete corrected CTR in clean Markdown. Do not include any preamble
105
+ like "Here is the corrected CTR" — output only the report itself.
106
+ """.strip()
107
+
108
+
109
+ def initial_draft_agent(logs: str) -> str:
110
+ print("\n[Agent 1] Initial Draft Agent running...")
111
+ prompt = f"Here are the session logs:\n\n{logs}"
112
+ ctr = chat(DRAFT_SYSTEM, prompt)
113
+ print(" → Draft CTR produced.")
114
+ return ctr
115
+
116
+
117
+ def validator_agent(logs: str, ctr: str) -> str:
118
+ print("\n[Agent 2] Validator Agent running...")
119
+ prompt = (
120
+ "## Original Logs\n\n"
121
+ f"{logs}\n\n"
122
+ "## Current CTR\n\n"
123
+ f"{ctr}"
124
+ )
125
+ report = chat(VALIDATOR_SYSTEM, prompt, temperature=0.1)
126
+ print(" → Validation report produced.")
127
+ return report
128
+
129
+
130
+ def correction_agent(logs: str, ctr: str, issues: str) -> str:
131
+ print("\n[Agent 3] Correction Agent running...")
132
+ prompt = (
133
+ "## Original Logs\n\n"
134
+ f"{logs}\n\n"
135
+ "## Current CTR (may have errors)\n\n"
136
+ f"{ctr}\n\n"
137
+ "## Validation Issues to Fix\n\n"
138
+ f"{issues}"
139
+ )
140
+ corrected = chat(CORRECTION_SYSTEM, prompt)
141
+ print(" → Corrected CTR produced.")
142
+ return corrected
143
+
144
+
145
+ def run_pipeline(logs: str, output_dir: str = "output") -> str:
146
+ ctr = initial_draft_agent(logs)
147
+ save_file(f"{output_dir}/ctr_draft.md", ctr)
148
+ print(f" → Saved: {output_dir}/ctr_draft.md")
149
+
150
+ # Step 2+3 – Iterative validation & correction
151
+ for iteration in range(1, MAX_ITERATIONS + 1):
152
+ print(f"\n{'='*60}")
153
+ print(f"Iteration {iteration} of {MAX_ITERATIONS}")
154
+ print(f"{'='*60}")
155
+
156
+ issues = validator_agent(logs, ctr)
157
+ save_file(f"{output_dir}/validation_iter_{iteration}.md", issues)
158
+ print(f" → Saved: {output_dir}/validation_iter_{iteration}.md")
159
+
160
+ if "NO_ISSUES_FOUND" in issues:
161
+ print(f"\n No issues found in iteration {iteration}. Pipeline complete.")
162
+ break
163
+
164
+ ctr = correction_agent(logs, ctr, issues)
165
+ save_file(f"{output_dir}/ctr_iter_{iteration}.md", ctr)
166
+ print(f" → Saved: {output_dir}/ctr_iter_{iteration}.md")
167
+
168
+ if iteration == MAX_ITERATIONS:
169
+ print(f"\n Reached maximum iterations ({MAX_ITERATIONS}). Saving final CTR.")
170
+
171
+ # Save final output
172
+ save_file(f"{output_dir}/ctr_final.md", ctr)
173
+ print(f"\n Final CTR saved to: {output_dir}/ctr_final.md")
174
+ return ctr
175
+
176
+
177
+ # ─── ENTRY POINT ───────────────────────────────────────────────────────────────
178
+
179
+ if __name__ == "__main__":
180
+ log_file = sys.argv[1] if len(sys.argv) > 1 else "example-log.txt"
181
+ out_dir = sys.argv[2] if len(sys.argv) > 2 else "output"
182
+
183
+ if not os.path.exists(log_file):
184
+ print(f"Error: Log file '{log_file}' not found.")
185
+ sys.exit(1)
186
+
187
+ print(f" Starting Log Analyzer Pipeline")
188
+ print(f" Log file : {log_file}")
189
+ print(f" Output : {out_dir}/")
190
+ print(f" Model : {MODEL}")
191
+
192
+ logs = load_file(log_file)
193
+ run_pipeline(logs, out_dir)
main.py ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from fastapi import Body
3
+ from pydantic import BaseModel
4
+ from openai import AsyncOpenAI
5
+ import asyncio
6
+ import json
7
+ import os
8
+ import uuid
9
+ from dotenv import load_dotenv
10
+ from models import ClusterRequest
11
+
12
+ from utils.filtering import get_representatives
13
+
14
+
15
+ os.makedirs("output", exist_ok=True)
16
+
17
+
18
+ # Import the existing pipeline
19
+ from agents import run_pipeline
20
+
21
+ load_dotenv()
22
+
23
+ app = FastAPI()
24
+
25
+ # Configure AsyncOpenAI Client with Requesty settings
26
+ client = AsyncOpenAI(
27
+ api_key=os.environ.get("REQUESTY_API_KEY", "missing_key"),
28
+ base_url="https://router.requesty.ai/v1",
29
+ default_headers={
30
+ "HTTP-Referer": "https://yourapp.com",
31
+ "X-Title": "My App",
32
+ }
33
+ )
34
+
35
+ class LogsRequest(BaseModel):
36
+ logs: str
37
+
38
+ async def get_insights(report: str) -> str:
39
+ response = await client.chat.completions.create(
40
+ model="openai/gpt-4o",
41
+ messages=[
42
+ {"role": "system", "content": "You are an Insights Agent. Analyze the provided user session logs/report. Provide exactly 5-7 short, easy-to-read numbered points containing the key behavioral insights. Ensure these insights are highly specific to the details in the provided story/logs and avoid any generic observations."},
43
+ {"role": "user", "content": f"Here is the report generated from the logs:\n{report}"}
44
+ ]
45
+ )
46
+ return response.choices[0].message.content
47
+
48
+ async def get_state_flow(report: str) -> str:
49
+ response = await client.chat.completions.create(
50
+ model="openai/gpt-4o",
51
+ messages=[
52
+ {"role": "system", "content": "You are a State Flow Agent. Analyze the report/logs and generate a high-level state flow diagram in Mermaid JS format (flowchart TD). Keep it to a maximum of 10 nodes. IMPORTANT: You must ONLY include the REASON on the edges (arrows) for turning point decisions, moments of friction, or loops (e.g., when the user goes back to adjust the cart because of price constraints). DO NOT annotate standard forward steps (like opening the app, or standard progression) with descriptions on the edges. If the user loops back or returns to a previous state, correctly map the arrow back to the previous node and explicitly state the reason on that edge. Return ONLY the raw Mermaid code string without markdown wrappers (e.g., no ```mermaid)."},
53
+ {"role": "user", "content": f"Here is the report generated from the logs:\n{report}"}
54
+ ]
55
+ )
56
+ return response.choices[0].message.content.strip()
57
+
58
+ async def get_suggestions(report: str) -> str:
59
+ response = await client.chat.completions.create(
60
+ model="openai/gpt-4o-mini",
61
+ messages=[
62
+ {"role": "system", "content": "You are a Suggestion Agent. Review the user progression and provide ONLY 5 actionable business recommendations to improve conversion. Format your response strictly as a numbered list of short, concise points. Make all suggestions highly specific to the provided story and logs; do not include generic advice."},
63
+ {"role": "user", "content": f"Here is the report generated from the logs:\n{report}"}
64
+ ]
65
+ )
66
+ return response.choices[0].message.content
67
+
68
+ @app.post("/processed-logs")
69
+ async def process_logs(request: LogsRequest):
70
+ try:
71
+ # 1. Run the existing pipeline to get the report (CTR)
72
+ # Since it is synchronous and does I/O, we offload to a thread
73
+ output_dir = f"output_{uuid.uuid4().hex}"
74
+ report = await asyncio.to_thread(run_pipeline, request.logs, output_dir)
75
+
76
+ # 2. Run all three new agents concurrently using the generated report string
77
+ insights, state_flow, suggestions = await asyncio.gather(
78
+ get_insights(report),
79
+ get_state_flow(report),
80
+ get_suggestions(report)
81
+ )
82
+
83
+ combined = f"""
84
+ # REPORT:
85
+ {report}
86
+
87
+ # STATE FLOW
88
+ ```mermaid
89
+ {state_flow.replace("\n", """
90
+ """)}
91
+ ```
92
+
93
+ # INSIGHTS:
94
+ {insights}
95
+
96
+ # SUGESTIONS:
97
+ {suggestions}
98
+ """
99
+ os.makedirs("output", exist_ok=True)
100
+ x = len(os.listdir("output"))
101
+ file_name = f"output/output{x}.md"
102
+ with open(file_name, 'w', encoding='utf-8') as f:
103
+ f.write(combined)
104
+
105
+ # 3. Return the final output
106
+ return {
107
+ "report": report,
108
+ "insights": insights,
109
+ "state_flow": state_flow,
110
+ "suggestions": suggestions
111
+ }
112
+
113
+ except Exception as e:
114
+ raise HTTPException(status_code=500, detail=str(e))
115
+
116
+
117
+
118
+ @app.post("/get_representatives")
119
+ def cluster_texts(request: ClusterRequest = Body(...)):
120
+ if not request.texts or len(request.texts) == 0:
121
+ raise HTTPException(status_code=400, detail="texts list cannot be empty")
122
+
123
+ try:
124
+ reps = get_representatives(
125
+ request.texts,
126
+ request.eps,
127
+ request.min_samples
128
+ )
129
+
130
+ return {
131
+ "input_size": len(request.texts),
132
+ "output_size": len(reps),
133
+ "representatives": reps
134
+ }
135
+
136
+ except Exception as e:
137
+ raise HTTPException(status_code=500, detail=str(e))
models.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from pydantic import BaseModel
3
+ from typing import List
4
+
5
+ class ClusterRequest(BaseModel):
6
+ texts: List[str]
7
+ eps: float = 0.4
8
+ min_samples: int = 2
9
+
10
+
11
+ ClusterRequest.model_rebuild()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ dotenv
2
+ openai
3
+ fastapi
4
+ uvicorn
5
+ requests
6
+ numpy
7
+ scikit-learn
utils/filtering.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import numpy as np
3
+ from typing import List
4
+
5
+ from sklearn.metrics.pairwise import cosine_distances
6
+ from sklearn.cluster import DBSCAN
7
+ import os
8
+
9
+ from dotenv import load_dotenv
10
+
11
+
12
+ load_dotenv()
13
+
14
+
15
+ API_KEY = os.getenv("REQUESTY_API_KEY")
16
+ API_URL = "https://router.requesty.ai/v1/embeddings"
17
+
18
+
19
+
20
+ def get_embeddings(texts: List[str]) -> np.ndarray:
21
+ response = requests.post(
22
+ API_URL,
23
+ headers={
24
+ "Authorization": f"Bearer {API_KEY}",
25
+ "Content-Type": "application/json",
26
+ },
27
+ json={
28
+ "input": texts,
29
+ "model": "openai/text-embedding-3-small",
30
+ "encoding_format": "float"
31
+ },
32
+ timeout=30
33
+ )
34
+
35
+ if response.status_code != 200:
36
+ raise Exception(f"Embedding API error: {response.text}")
37
+
38
+ data = response.json()
39
+ embeddings = [item["embedding"] for item in data["data"]]
40
+ return np.array(embeddings)
41
+
42
+
43
+ def batched_embeddings(texts: List[str], batch_size=50):
44
+ all_embeddings = []
45
+
46
+ for i in range(0, len(texts), batch_size):
47
+ batch = texts[i:i + batch_size]
48
+ emb = get_embeddings(batch)
49
+ all_embeddings.append(emb)
50
+
51
+ return np.vstack(all_embeddings)
52
+
53
+ def get_representatives(texts: List[str], eps: float, min_samples: int):
54
+ embeddings = batched_embeddings(texts)
55
+
56
+ distance_matrix = cosine_distances(embeddings)
57
+
58
+ clustering = DBSCAN(
59
+ eps=eps,
60
+ min_samples=min_samples,
61
+ metric="precomputed"
62
+ ).fit(distance_matrix)
63
+
64
+ labels = clustering.labels_
65
+ clusters = {}
66
+
67
+ for idx, label in enumerate(labels):
68
+ if label == -1:
69
+ clusters[f"noise_{idx}"] = [idx]
70
+ else:
71
+ clusters.setdefault(label, []).append(idx)
72
+
73
+ representatives = []
74
+
75
+ for _, indices in clusters.items():
76
+ cluster_embeddings = embeddings[indices]
77
+ centroid = np.mean(cluster_embeddings, axis=0)
78
+
79
+ distances = cosine_distances(
80
+ cluster_embeddings, centroid.reshape(1, -1)
81
+ ).flatten()
82
+
83
+ best_idx = indices[np.argmin(distances)]
84
+ representatives.append(texts[best_idx])
85
+
86
+ return representatives