MukulRay commited on
Commit
c8b552c
·
1 Parent(s): ccc6a96

chore: code cleanup, add .env.example, update README

Browse files
Files changed (7) hide show
  1. .env .example +4 -0
  2. README.MD +1 -1
  3. embedder.py +2 -1
  4. ingest.py +2 -0
  5. main.py +2 -1
  6. pyvenv.cfg +5 -0
  7. rag.py +1 -1
.env .example ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ MODEL_PATH=./models/merged/exp2_lr2e-4_r16
2
+ PINECONE_API_KEY=your_pinecone_api_key_here
3
+ PINECONE_INDEX=llmops-rag
4
+ EMBED_MODEL=sentence-transformers/all-MiniLM-L6-v2
README.MD CHANGED
@@ -17,7 +17,7 @@ Most LLM projects stop at inference. This one goes further:
17
  - **Serving layer** — FastAPI with async lifespan model loading, typed Pydantic request/response models, CORS, health check, and a clean browser UI served from the same process
18
  - **Containerized** — Dockerfile built for slim Python 3.12, model loaded at runtime via env-configurable path (not baked in)
19
  - **Cloud-ready** — One-shot Azure deployment via ACR + Container Apps, with Pinecone key injected as a secret
20
- - **Intelligence-area** — Trained a personalized Corpus with Genshin Impact Game's Vast Lore and Character Builds
21
 
22
  ---
23
 
 
17
  - **Serving layer** — FastAPI with async lifespan model loading, typed Pydantic request/response models, CORS, health check, and a clean browser UI served from the same process
18
  - **Containerized** — Dockerfile built for slim Python 3.12, model loaded at runtime via env-configurable path (not baked in)
19
  - **Cloud-ready** — One-shot Azure deployment via ACR + Container Apps, with Pinecone key injected as a secret
20
+ - **Domain knowledge** — RAG corpus built around Genshin Impact lore, character builds, and elemental mechanics, serving as a rich real-world knowledge base for retrieval evaluation
21
 
22
  ---
23
 
embedder.py CHANGED
@@ -1,6 +1,7 @@
1
- from sentence_transformers import SentenceTransformer
2
  import os
3
 
 
 
4
  EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
5
 
6
  _model = None
 
 
1
  import os
2
 
3
+ from sentence_transformers import SentenceTransformer
4
+
5
  EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
6
 
7
  _model = None
ingest.py CHANGED
@@ -11,7 +11,9 @@ import uuid
11
  import argparse
12
  import logging
13
  from pathlib import Path
 
14
  from dotenv import load_dotenv
 
15
  load_dotenv()
16
 
17
  from pinecone import Pinecone, ServerlessSpec
 
11
  import argparse
12
  import logging
13
  from pathlib import Path
14
+
15
  from dotenv import load_dotenv
16
+
17
  load_dotenv()
18
 
19
  from pinecone import Pinecone, ServerlessSpec
main.py CHANGED
@@ -10,7 +10,8 @@ from rag import RAGChain
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- rag_chain: RAGChain = None
 
14
 
15
 
16
  @asynccontextmanager
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
+ from typing import Optional
14
+ rag_chain: Optional[RAGChain] = None
15
 
16
 
17
  @asynccontextmanager
pyvenv.cfg ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ home = C:\Users\mukul\AppData\Local\Programs\Python\Python312
2
+ include-system-site-packages = false
3
+ version = 3.12.9
4
+ executable = C:\Users\mukul\AppData\Local\Programs\Python\Python312\python.exe
5
+ command = C:\Users\mukul\AppData\Local\Programs\Python\Python312\python.exe -m venv E:\Projects\llmops-serve\venv
rag.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import logging
 
3
  import torch
4
  from dotenv import load_dotenv
5
 
@@ -72,7 +73,6 @@ class RAGChain:
72
  return_full_text=False,
73
  eos_token_id=tokenizer.eos_token_id,
74
  pad_token_id=tokenizer.eos_token_id,
75
-
76
  )
77
  llm = HuggingFacePipeline(pipeline=hf_pipe)
78
  logger.info("Model loaded.")
 
1
  import os
2
  import logging
3
+
4
  import torch
5
  from dotenv import load_dotenv
6
 
 
73
  return_full_text=False,
74
  eos_token_id=tokenizer.eos_token_id,
75
  pad_token_id=tokenizer.eos_token_id,
 
76
  )
77
  llm = HuggingFacePipeline(pipeline=hf_pipe)
78
  logger.info("Model loaded.")