vip11017 commited on
Commit
207e62f
·
1 Parent(s): 649efae

completed full flow

Browse files
.Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Avoid Hugging Face cache permission errors
4
+ ENV HF_HOME=/tmp/hf-cache
5
+ ENV HF_DATASETS_CACHE=/tmp/hf-cache
6
+
7
+ # Install necessary system dependencies
8
+ RUN apt-get update && apt-get install -y \
9
+ git \
10
+ curl \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Set working directory
14
+ WORKDIR /code
15
+
16
+ # Copy application and requirements
17
+ COPY ./app /code/app
18
+ COPY requirements.txt .
19
+
20
+ # Install Python dependencies
21
+ RUN pip install --no-cache-dir -r requirements.txt
22
+
23
+ # Expose the FastAPI port
24
+ EXPOSE 7860
25
+
26
+ # Run the FastAPI app
27
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Auro Chatbot Backend
3
+ emoji: 🔥
4
+ colorFrom: yellow
5
+ colorTo: yellow
6
+ sdk: docker
7
+ app_port: 7860
8
+ base_path: /
9
+ pinned: false
10
+ ---
11
+
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app/chatbot/demo_routes.py CHANGED
@@ -1,10 +1,10 @@
1
  from fastapi import APIRouter, HTTPException
2
  from pydantic import BaseModel
3
  from app.chatbot.demo_rag import get_response
4
- from app.config import demo_chatbot_configs
5
 
6
  router = APIRouter()
7
- chatbot_sessions = {} # chatbot_id -> loaded config/session
8
 
9
  class ChatInput(BaseModel):
10
  question: str
@@ -13,22 +13,24 @@ class ChatInput(BaseModel):
13
  email: str
14
 
15
 
16
- @router.post("/demo/{chatbot_id}")
17
- async def demo_chat(chatbot_id: str, input: ChatInput):
18
- # Lazy-load chatbot config
19
- print(f"got question: {input.question} for chatbot_id: {chatbot_id} and session_id: {input.session_id}")
20
- if chatbot_id not in chatbot_sessions:
21
- rag_config = demo_chatbot_configs.find_one({"chatbot_id": chatbot_id})
 
 
22
  if not rag_config:
23
  raise HTTPException(status_code=404, detail="Chatbot not found")
24
- chatbot_sessions[chatbot_id] = rag_config
25
-
26
- rag_config = chatbot_sessions[chatbot_id]
27
- print(rag_config)
28
 
29
  config = {
30
- 'configurable': {
31
- 'thread_id': input.session_id
32
  }
33
  }
34
 
@@ -40,4 +42,26 @@ async def demo_chat(chatbot_id: str, input: ChatInput):
40
  rag_config=rag_config,
41
  config=config
42
  )
43
- return {"answer": response['response']}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import APIRouter, HTTPException
2
  from pydantic import BaseModel
3
  from app.chatbot.demo_rag import get_response
4
+ from app.config import demo_chatbot_configs, demo_form_submissions
5
 
6
  router = APIRouter()
7
+ chatbot_sessions = {} # submission_id -> loaded config/session
8
 
9
  class ChatInput(BaseModel):
10
  question: str
 
13
  email: str
14
 
15
 
16
+ @router.post("/demo/{submission_id}")
17
+ async def demo_chat(submission_id: str, input: ChatInput):
18
+ # Lazy-load chatbot config by submission_id
19
+ print(f"Got question: {input.question} for submission_id: {submission_id} and session_id: {input.session_id}")
20
+
21
+ if submission_id not in chatbot_sessions:
22
+ # Fetch RAG config using submission_id
23
+ rag_config = demo_chatbot_configs.find_one({"submission_id": submission_id})
24
  if not rag_config:
25
  raise HTTPException(status_code=404, detail="Chatbot not found")
26
+ chatbot_sessions[submission_id] = rag_config
27
+
28
+ rag_config = chatbot_sessions[submission_id]
29
+ print(f"Loaded RAG config for submission_id {submission_id}: {rag_config}")
30
 
31
  config = {
32
+ "configurable": {
33
+ "thread_id": input.session_id
34
  }
35
  }
36
 
 
42
  rag_config=rag_config,
43
  config=config
44
  )
45
+
46
+ return {"answer": response.get("response", "")}
47
+
48
+
49
+ @router.get("/demo/status/{submission_id}")
50
+ async def demo_chatbot_status(submission_id: str):
51
+ """
52
+ Returns the per-stage progress of a demo chatbot based on the submission_id.
53
+ """
54
+ config = demo_form_submissions.find_one({"submission_id": submission_id})
55
+ if not config:
56
+ raise HTTPException(status_code=404, detail="Demo submission not found")
57
+
58
+ return {
59
+ "submission_id": submission_id,
60
+ "stages": config.get("stages", {}), # only true/false now
61
+ "company_id": config.get("company_id"),
62
+ "website_url": config.get("website_url"),
63
+ "chatbot_id": config.get("chatbot_id")
64
+ }
65
+
66
+
67
+
app/ingestion/demo_form_fetch_store.py CHANGED
@@ -18,6 +18,14 @@ def store_demo_chatbot(ingest_data: ChatbotIngest):
18
  data_dict['company_id'] = company_id
19
  data_dict['chatbot_id'] = chatbot_id
20
 
 
 
 
 
 
 
 
 
21
  # Insert into MongoDB
22
  demo_form_submissions.insert_one(data_dict)
23
  print(f"Chatbot {chatbot_id} ingested successfully for company {ingest_data.company_name}")
 
18
  data_dict['company_id'] = company_id
19
  data_dict['chatbot_id'] = chatbot_id
20
 
21
+ #Add status field
22
+ data_dict['stages'] = {
23
+ 'form_data_extracted': True,
24
+ 'website_scraped': False,
25
+ "content_embedded": False,
26
+ "rag_ready": False
27
+ }
28
+
29
  # Insert into MongoDB
30
  demo_form_submissions.insert_one(data_dict)
31
  print(f"Chatbot {chatbot_id} ingested successfully for company {ingest_data.company_name}")
app/ingestion/models.py CHANGED
@@ -3,6 +3,7 @@ from typing import List, Optional
3
  from datetime import datetime, timezone
4
 
5
  class ChatbotIngest(BaseModel):
 
6
  company_name: str
7
  chatbot_name: Optional[str] = None
8
  website_url: HttpUrl
 
3
  from datetime import datetime, timezone
4
 
5
  class ChatbotIngest(BaseModel):
6
+ submission_id: str
7
  company_name: str
8
  chatbot_name: Optional[str] = None
9
  website_url: HttpUrl
app/ingestion/rag_setup.py CHANGED
@@ -178,6 +178,7 @@ def store_demo_rag_config(chatbot_id, company_id, ingest: ChatbotIngest) -> None
178
  Stores the RAG configuration prompt for the demo chatbot in MongoDB.
179
  """
180
  demo_rag_dict = {
 
181
  "chatbot_id": chatbot_id,
182
  "company_id": company_id,
183
  "company_name": ingest.company_name,
@@ -187,7 +188,7 @@ def store_demo_rag_config(chatbot_id, company_id, ingest: ChatbotIngest) -> None
187
  "name": "all",
188
  "collection": f"chatbot_{chatbot_id}",
189
  "top_k": 25,
190
- "filter_score": 50
191
  }
192
  ]
193
  }
 
178
  Stores the RAG configuration prompt for the demo chatbot in MongoDB.
179
  """
180
  demo_rag_dict = {
181
+ "submission_id": ingest.submission_id,
182
  "chatbot_id": chatbot_id,
183
  "company_id": company_id,
184
  "company_name": ingest.company_name,
 
188
  "name": "all",
189
  "collection": f"chatbot_{chatbot_id}",
190
  "top_k": 25,
191
+ "filter_score": 0.8
192
  }
193
  ]
194
  }
app/ingestion/routes.py CHANGED
@@ -10,6 +10,7 @@ logger.setLevel(logging.INFO)
10
 
11
  router = APIRouter()
12
 
 
13
 
14
  @router.post("/ingest")
15
  async def ingest_chatbot_webhook(payload: dict,background_tasks: BackgroundTasks):
@@ -32,7 +33,7 @@ async def ingest_chatbot_webhook(payload: dict,background_tasks: BackgroundTasks
32
 
33
  return {
34
  "chatbot_id": chatbot_id,
35
- "demo_url": f"/chatbot/demo/{chatbot_id}",
36
  "message": "Demo chatbot ingestion succesfull"
37
  }
38
  except Exception as e:
 
10
 
11
  router = APIRouter()
12
 
13
+ FRONTEND_BASE_URL = "http://localhost:8000"
14
 
15
  @router.post("/ingest")
16
  async def ingest_chatbot_webhook(payload: dict,background_tasks: BackgroundTasks):
 
33
 
34
  return {
35
  "chatbot_id": chatbot_id,
36
+ "demo_url": f"{FRONTEND_BASE_URL}/chatbot/demo/{chatbot_id}",
37
  "message": "Demo chatbot ingestion succesfull"
38
  }
39
  except Exception as e:
app/ingestion/utils.py CHANGED
@@ -11,6 +11,9 @@ def extract_tally_payload(payload: dict):
11
  # Map key -> field
12
  key_map = {f["key"]: f for f in fields}
13
 
 
 
 
14
  # Company info
15
  company_name = key_map.get("question_kyp5Dd", {}).get("value")
16
  chatbot_name = key_map.get("question_vAp47X", {}).get("value") or None
@@ -46,6 +49,7 @@ def extract_tally_payload(payload: dict):
46
  submitted_at = datetime.fromisoformat(submitted_at_str.replace("Z", "+00:00"))
47
 
48
  return {
 
49
  "company_name": company_name,
50
  "chatbot_name": chatbot_name,
51
  "website_url": website_url,
 
11
  # Map key -> field
12
  key_map = {f["key"]: f for f in fields}
13
 
14
+ # Submission ID
15
+ submission_id = payload.get("data", {}).get("submissionId")
16
+
17
  # Company info
18
  company_name = key_map.get("question_kyp5Dd", {}).get("value")
19
  chatbot_name = key_map.get("question_vAp47X", {}).get("value") or None
 
49
  submitted_at = datetime.fromisoformat(submitted_at_str.replace("Z", "+00:00"))
50
 
51
  return {
52
+ "submission_id": submission_id,
53
  "company_name": company_name,
54
  "chatbot_name": chatbot_name,
55
  "website_url": website_url,
app/ingestion/workers.py CHANGED
@@ -1,22 +1,54 @@
1
- # workers.py
2
  from app.ingestion.demo_form_fetch_store import get_chatbot_config
3
  from app.ingestion.models import ChatbotIngest
4
  from app.ingestion.rag_setup import scrape_website, chunk_and_embed, store_demo_rag_config
 
5
 
6
-
7
- def build_rag_for_chatbot(chatbot_id: str) -> None:
8
- """
9
- Full synchronous RAG build pipeline for a demo chatbot.
10
- """
11
-
12
  config = get_chatbot_config(chatbot_id)
13
  if not config or not config.get("website_url"):
 
 
 
 
14
  raise ValueError("Chatbot config missing website_url")
15
 
16
- pages = scrape_website(config["website_url"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- chunk_and_embed(
19
- chatbot_id=chatbot_id,
20
- pages=pages,
21
- )
22
- store_demo_rag_config(chatbot_id=chatbot_id, company_id=config['company_id'], ingest=ChatbotIngest(**config))
 
 
 
1
  from app.ingestion.demo_form_fetch_store import get_chatbot_config
2
  from app.ingestion.models import ChatbotIngest
3
  from app.ingestion.rag_setup import scrape_website, chunk_and_embed, store_demo_rag_config
4
+ from app.config import demo_form_submissions
5
 
6
+ def build_rag_for_chatbot(chatbot_id: str):
 
 
 
 
 
7
  config = get_chatbot_config(chatbot_id)
8
  if not config or not config.get("website_url"):
9
+ demo_form_submissions.update_one(
10
+ {"chatbot_id": chatbot_id},
11
+ {"$set": {"stages.rag_ready": False, "error": "Missing website URL"}}
12
+ )
13
  raise ValueError("Chatbot config missing website_url")
14
 
15
+ try:
16
+ # Stage: Website scraping
17
+ demo_form_submissions.update_one(
18
+ {"chatbot_id": chatbot_id},
19
+ {"$set": {"stages.website_scraped": False}}
20
+ )
21
+ pages = scrape_website(config["website_url"])
22
+ demo_form_submissions.update_one(
23
+ {"chatbot_id": chatbot_id},
24
+ {"$set": {"stages.website_scraped": True}}
25
+ )
26
+
27
+ # Stage: Chunking & embedding
28
+ demo_form_submissions.update_one(
29
+ {"chatbot_id": chatbot_id},
30
+ {"$set": {"stages.content_embedded": False}}
31
+ )
32
+ chunk_and_embed(chatbot_id=chatbot_id, pages=pages)
33
+ demo_form_submissions.update_one(
34
+ {"chatbot_id": chatbot_id},
35
+ {"$set": {"stages.content_embedded": True}}
36
+ )
37
+
38
+ # Stage: RAG ready
39
+ store_demo_rag_config(
40
+ chatbot_id=chatbot_id,
41
+ company_id=config['company_id'],
42
+ ingest=ChatbotIngest(**config)
43
+ )
44
+ demo_form_submissions.update_one(
45
+ {"chatbot_id": chatbot_id},
46
+ {"$set": {"stages.rag_ready": True}}
47
+ )
48
 
49
+ except Exception as e:
50
+ demo_form_submissions.update_one(
51
+ {"chatbot_id": chatbot_id},
52
+ {"$set": {"stages.rag_ready": False, "error": str(e)}}
53
+ )
54
+ raise