Spaces:
Sleeping
Sleeping
Commit
·
5e51aba
1
Parent(s):
ee2e099
Updated
Browse files- .~lock.SME_Builder_Dataset.csv# +1 -0
- main.py +15 -6
- sme_builder_dataset.csv +0 -0
- smebuilder_vector.py +55 -0
.~lock.SME_Builder_Dataset.csv#
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
,alash-studios,alash-studios-HP-EliteBook-840-G3,19.09.2025 18:30,file:///home/alash-studios/.config/libreoffice/4;
|
main.py
CHANGED
|
@@ -8,6 +8,7 @@ from langchain.prompts import PromptTemplate
|
|
| 8 |
from langchain_huggingface import HuggingFaceEndpoint
|
| 9 |
from langdetect import detect, DetectorFactory
|
| 10 |
from huggingface_hub.utils import HfHubHTTPError # for quota error handling
|
|
|
|
| 11 |
|
| 12 |
# ----------------- CONFIG -----------------
|
| 13 |
DetectorFactory.seed = 0
|
|
@@ -26,15 +27,14 @@ spitch_client = Spitch()
|
|
| 26 |
|
| 27 |
# HuggingFace LLM (better tuned for code generation)
|
| 28 |
llm = HuggingFaceEndpoint(
|
| 29 |
-
repo_id=
|
| 30 |
temperature=0.7,
|
| 31 |
top_p=0.9,
|
| 32 |
do_sample=True,
|
| 33 |
repetition_penalty=1.1,
|
| 34 |
-
max_new_tokens=
|
| 35 |
)
|
| 36 |
|
| 37 |
-
|
| 38 |
# FastAPI app
|
| 39 |
app = FastAPI(title="DevAssist AI Backend (FastAPI + LangChain)")
|
| 40 |
|
|
@@ -93,6 +93,7 @@ Guidelines:
|
|
| 93 |
- Return **only valid JSON** with keys: "files" → { "index.html": "...", "styles.css": "...", "script.js": "..." }
|
| 94 |
|
| 95 |
Prompt: {user_prompt}
|
|
|
|
| 96 |
|
| 97 |
Output:
|
| 98 |
"""
|
|
@@ -101,7 +102,7 @@ Output:
|
|
| 101 |
chat_chain = PromptTemplate(input_variables=["question"], template=chat_template) | llm
|
| 102 |
stt_chain = PromptTemplate(input_variables=["speech"], template=stt_chat_template) | llm
|
| 103 |
autodoc_chain = PromptTemplate(input_variables=["code"], template=autodoc_template) | llm
|
| 104 |
-
sme_chain = PromptTemplate(input_variables=["user_prompt"], template=sme_template) | llm
|
| 105 |
|
| 106 |
# ----------------- REQUEST MODELS -----------------
|
| 107 |
class ChatRequest(BaseModel):
|
|
@@ -187,7 +188,11 @@ def autodoc(req: AutoDocRequest, authorization: str | None = Header(None)):
|
|
| 187 |
@app.post("/sme/generate")
|
| 188 |
async def sme_generate(payload: dict = Body(...)):
|
| 189 |
try:
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
return {"success": True, "data": response}
|
| 192 |
except HfHubHTTPError as e:
|
| 193 |
if "exceeded" in str(e).lower() or "quota" in str(e).lower():
|
|
@@ -228,7 +233,11 @@ async def sme_speech_generate(file: UploadFile = File(...), lang_hint: str | Non
|
|
| 228 |
translation = transcription
|
| 229 |
|
| 230 |
try:
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
return {
|
| 233 |
"success": True,
|
| 234 |
"transcription": transcription,
|
|
|
|
| 8 |
from langchain_huggingface import HuggingFaceEndpoint
|
| 9 |
from langdetect import detect, DetectorFactory
|
| 10 |
from huggingface_hub.utils import HfHubHTTPError # for quota error handling
|
| 11 |
+
from smebuilder_vector import retriever # <-- your retriever
|
| 12 |
|
| 13 |
# ----------------- CONFIG -----------------
|
| 14 |
DetectorFactory.seed = 0
|
|
|
|
| 27 |
|
| 28 |
# HuggingFace LLM (better tuned for code generation)
|
| 29 |
llm = HuggingFaceEndpoint(
|
| 30 |
+
repo_id=HF_MODEL,
|
| 31 |
temperature=0.7,
|
| 32 |
top_p=0.9,
|
| 33 |
do_sample=True,
|
| 34 |
repetition_penalty=1.1,
|
| 35 |
+
max_new_tokens=2048 # bumped tokens
|
| 36 |
)
|
| 37 |
|
|
|
|
| 38 |
# FastAPI app
|
| 39 |
app = FastAPI(title="DevAssist AI Backend (FastAPI + LangChain)")
|
| 40 |
|
|
|
|
| 93 |
- Return **only valid JSON** with keys: "files" → { "index.html": "...", "styles.css": "...", "script.js": "..." }
|
| 94 |
|
| 95 |
Prompt: {user_prompt}
|
| 96 |
+
Context: {context}
|
| 97 |
|
| 98 |
Output:
|
| 99 |
"""
|
|
|
|
| 102 |
chat_chain = PromptTemplate(input_variables=["question"], template=chat_template) | llm
|
| 103 |
stt_chain = PromptTemplate(input_variables=["speech"], template=stt_chat_template) | llm
|
| 104 |
autodoc_chain = PromptTemplate(input_variables=["code"], template=autodoc_template) | llm
|
| 105 |
+
sme_chain = PromptTemplate(input_variables=["user_prompt", "context"], template=sme_template) | llm
|
| 106 |
|
| 107 |
# ----------------- REQUEST MODELS -----------------
|
| 108 |
class ChatRequest(BaseModel):
|
|
|
|
| 188 |
@app.post("/sme/generate")
|
| 189 |
async def sme_generate(payload: dict = Body(...)):
|
| 190 |
try:
|
| 191 |
+
user_prompt = payload.get("user_prompt", "")
|
| 192 |
+
# retrieve context
|
| 193 |
+
context_docs = retriever.get_relevant_documents(user_prompt)
|
| 194 |
+
context = "\n".join([doc.page_content for doc in context_docs]) if context_docs else "No extra context"
|
| 195 |
+
response = sme_chain.invoke({"user_prompt": user_prompt, "context": context})
|
| 196 |
return {"success": True, "data": response}
|
| 197 |
except HfHubHTTPError as e:
|
| 198 |
if "exceeded" in str(e).lower() or "quota" in str(e).lower():
|
|
|
|
| 233 |
translation = transcription
|
| 234 |
|
| 235 |
try:
|
| 236 |
+
# vector retrieval here too
|
| 237 |
+
context_docs = retriever.get_relevant_documents(translation)
|
| 238 |
+
context = "\n".join([doc.page_content for doc in context_docs]) if context_docs else "No extra context"
|
| 239 |
+
|
| 240 |
+
sme_response = sme_chain.invoke({"user_prompt": translation, "context": context})
|
| 241 |
return {
|
| 242 |
"success": True,
|
| 243 |
"transcription": transcription,
|
sme_builder_dataset.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
smebuilder_vector.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# smebuilder_vector.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
| 6 |
+
from langchain_chroma import Chroma
|
| 7 |
+
from langchain_core.documents import Document
|
| 8 |
+
|
| 9 |
+
# ----------------- CONFIG -----------------
|
| 10 |
+
DATASET_PATH = "sme_builder_dataset.csv"
|
| 11 |
+
DB_LOCATION = "./Dev_Assist_SME_Builder_DB"
|
| 12 |
+
COLLECTION_NAME = "landing_page_generation_examples"
|
| 13 |
+
EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
| 14 |
+
|
| 15 |
+
# ----------------- LOAD DATASET -----------------
|
| 16 |
+
if not os.path.exists(DATASET_PATH):
|
| 17 |
+
raise FileNotFoundError(f"Dataset file not found: {DATASET_PATH}")
|
| 18 |
+
|
| 19 |
+
df = pd.read_csv(DATASET_PATH)
|
| 20 |
+
|
| 21 |
+
# ----------------- EMBEDDINGS -----------------
|
| 22 |
+
embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
|
| 23 |
+
|
| 24 |
+
# Check if vector store exists
|
| 25 |
+
add_documents = not os.path.exists(DB_LOCATION)
|
| 26 |
+
|
| 27 |
+
# ----------------- CREATE DOCUMENTS -----------------
|
| 28 |
+
documents, ids = [], []
|
| 29 |
+
if add_documents:
|
| 30 |
+
for i, row in df.iterrows():
|
| 31 |
+
prompt = row.get("prompt", "")
|
| 32 |
+
html_code = row.get("html_code", "")
|
| 33 |
+
css_code = row.get("css_code", "")
|
| 34 |
+
js_code = row.get("js_code", "")
|
| 35 |
+
sector = row.get("sector", "")
|
| 36 |
+
|
| 37 |
+
page_content = " ".join(
|
| 38 |
+
[str(prompt), str(html_code), str(css_code), str(js_code), str(sector)]
|
| 39 |
+
).strip()
|
| 40 |
+
|
| 41 |
+
documents.append(Document(page_content=page_content, id=str(i)))
|
| 42 |
+
ids.append(str(i))
|
| 43 |
+
|
| 44 |
+
# ----------------- VECTOR STORE -----------------
|
| 45 |
+
vector_store = Chroma(
|
| 46 |
+
collection_name=COLLECTION_NAME,
|
| 47 |
+
persist_directory=DB_LOCATION,
|
| 48 |
+
embedding_function=embeddings,
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
if add_documents and documents:
|
| 52 |
+
vector_store.add_documents(documents=documents, ids=ids)
|
| 53 |
+
|
| 54 |
+
# ----------------- RETRIEVER -----------------
|
| 55 |
+
retriever = vector_store.as_retriever(search_kwargs={"k": 20})
|