Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files
model/contextual_response/constants.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Model configuration
|
| 2 |
+
EMBEDDING_MODEL_NAME = "BAAI/bge-small-en-v1.5"
|
| 3 |
+
GROQ_MODEL_NAME = "llama3-8b-8192"
|
| 4 |
+
|
| 5 |
+
# Document chunking configuration
|
| 6 |
+
CHUNK_SIZE = 1000
|
| 7 |
+
CHUNK_OVERLAP = 200
|
| 8 |
+
SEPARATORS = ["\n\n", "\n", " ", ""]
|
model/contextual_response/contextual_response.py
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import os
|
| 3 |
+
import logging
|
| 4 |
+
from typing import List, Optional
|
| 5 |
+
from contextlib import contextmanager
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
from langchain_groq import ChatGroq
|
| 10 |
+
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
|
| 11 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 12 |
+
from langchain.prompts import ChatPromptTemplate
|
| 13 |
+
from langchain_core.runnables import RunnablePassthrough
|
| 14 |
+
from langchain_core.output_parsers import StrOutputParser
|
| 15 |
+
from langchain_core.documents import Document
|
| 16 |
+
from sqlalchemy import select
|
| 17 |
+
from .constants import (
|
| 18 |
+
EMBEDDING_MODEL_NAME,
|
| 19 |
+
GROQ_MODEL_NAME,
|
| 20 |
+
CHUNK_SIZE,
|
| 21 |
+
CHUNK_OVERLAP,
|
| 22 |
+
SEPARATORS
|
| 23 |
+
)
|
| 24 |
+
from .system_prompt import RAG_SYSTEM_PROMPT
|
| 25 |
+
from models.db.db_setup import DocumentEmbedding, SessionLocal
|
| 26 |
+
# Configure logging
|
| 27 |
+
logging.basicConfig(
|
| 28 |
+
level=logging.INFO,
|
| 29 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 30 |
+
)
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
# Load environment variables
|
| 34 |
+
load_dotenv()
|
| 35 |
+
|
| 36 |
+
# Configure environment
|
| 37 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 38 |
+
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
|
| 39 |
+
if not GROQ_API_KEY:
|
| 40 |
+
logger.error("GROQ_API_KEY environment variable is not set")
|
| 41 |
+
raise ValueError("GROQ_API_KEY environment variable is not set.")
|
| 42 |
+
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
|
| 43 |
+
|
| 44 |
+
@contextmanager
|
| 45 |
+
def get_db_session():
|
| 46 |
+
"""Context manager for database sessions."""
|
| 47 |
+
db = SessionLocal()
|
| 48 |
+
try:
|
| 49 |
+
yield db
|
| 50 |
+
finally:
|
| 51 |
+
db.close()
|
| 52 |
+
|
| 53 |
+
class RAGManager:
|
| 54 |
+
def __init__(self):
|
| 55 |
+
self._embed_model: Optional[HuggingFaceEmbeddings] = None
|
| 56 |
+
self._rag_llm: Optional[ChatGroq] = None
|
| 57 |
+
self._initialize_embed_model()
|
| 58 |
+
self._rag_llm: Optional[ChatGroq] = None
|
| 59 |
+
logger.info("RAGManager initialized")
|
| 60 |
+
|
| 61 |
+
def _initialize_embed_model(self):
|
| 62 |
+
"""Initialize the embedding model."""
|
| 63 |
+
try:
|
| 64 |
+
logger.debug("Initializing embedding model")
|
| 65 |
+
self._embed_model = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
|
| 66 |
+
logger.info("Embedding model initialized successfully")
|
| 67 |
+
except Exception as e:
|
| 68 |
+
logger.error(f"Error initializing embedding model: {str(e)}")
|
| 69 |
+
raise
|
| 70 |
+
|
| 71 |
+
@property
|
| 72 |
+
def embed_model(self):
|
| 73 |
+
"""Get or create embedding model."""
|
| 74 |
+
if self._embed_model is None:
|
| 75 |
+
self._initialize_embed_model()
|
| 76 |
+
return self._embed_model
|
| 77 |
+
|
| 78 |
+
@property
|
| 79 |
+
def rag_llm(self):
|
| 80 |
+
"""Get or create RAG LLM model."""
|
| 81 |
+
if self._rag_llm is None:
|
| 82 |
+
logger.debug("Initializing RAG LLM model")
|
| 83 |
+
self._rag_llm = ChatGroq(model=GROQ_MODEL_NAME)
|
| 84 |
+
logger.info("RAG LLM model initialized successfully")
|
| 85 |
+
return self._rag_llm
|
| 86 |
+
|
| 87 |
+
async def get_relevant_documents(self, query: str, k: int = 4) -> List[Document]:
|
| 88 |
+
"""Get relevant documents using vector similarity search."""
|
| 89 |
+
with get_db_session() as db:
|
| 90 |
+
# Get query embedding
|
| 91 |
+
query_embedding = self.embed_model.embed_query(query)
|
| 92 |
+
|
| 93 |
+
# Get all documents and calculate similarities
|
| 94 |
+
stmt = select(DocumentEmbedding)
|
| 95 |
+
results = db.execute(stmt).scalars().all()
|
| 96 |
+
|
| 97 |
+
if not results:
|
| 98 |
+
logger.warning("No documents found in database")
|
| 99 |
+
return []
|
| 100 |
+
|
| 101 |
+
# Convert embeddings to numpy array for faster computation
|
| 102 |
+
embeddings = np.array([doc.embedding for doc in results])
|
| 103 |
+
query_embedding = np.array(query_embedding)
|
| 104 |
+
|
| 105 |
+
# Calculate similarities in batch
|
| 106 |
+
similarities = np.dot(embeddings, query_embedding) / (
|
| 107 |
+
np.linalg.norm(embeddings, axis=1) * np.linalg.norm(query_embedding)
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
# Get indices of top k results
|
| 111 |
+
top_k_indices = np.argsort(similarities)[-k:][::-1]
|
| 112 |
+
|
| 113 |
+
# Convert top k results to Documents
|
| 114 |
+
return [
|
| 115 |
+
Document(
|
| 116 |
+
page_content=results[idx].content,
|
| 117 |
+
metadata=results[idx].doc_metadata or {}
|
| 118 |
+
)
|
| 119 |
+
for idx in top_k_indices
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
def format_docs(self, docs: List[Document]) -> str:
|
| 123 |
+
"""Format documents with error handling."""
|
| 124 |
+
try:
|
| 125 |
+
if not docs:
|
| 126 |
+
logger.debug("No documents to format")
|
| 127 |
+
return ""
|
| 128 |
+
formatted = "\n\n".join(doc.page_content for doc in docs)
|
| 129 |
+
logger.debug(f"Formatted {len(docs)} documents")
|
| 130 |
+
return formatted
|
| 131 |
+
except Exception as e:
|
| 132 |
+
logger.error(f"Error formatting documents: {str(e)}")
|
| 133 |
+
return ""
|
| 134 |
+
|
| 135 |
+
async def process_documents(self, documents: List[Document]) -> bool:
|
| 136 |
+
"""Process documents and store embeddings in PostgreSQL."""
|
| 137 |
+
try:
|
| 138 |
+
if not documents:
|
| 139 |
+
logger.warning("No documents provided")
|
| 140 |
+
return False
|
| 141 |
+
|
| 142 |
+
# Split documents
|
| 143 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
| 144 |
+
separators=SEPARATORS,
|
| 145 |
+
chunk_size=CHUNK_SIZE,
|
| 146 |
+
chunk_overlap=CHUNK_OVERLAP,
|
| 147 |
+
length_function=len,
|
| 148 |
+
is_separator_regex=False,
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
split_docs = []
|
| 152 |
+
for doc in documents:
|
| 153 |
+
try:
|
| 154 |
+
split_docs.extend(text_splitter.split_documents([doc]))
|
| 155 |
+
except Exception as e:
|
| 156 |
+
logger.warning(f"Error splitting document {doc.metadata.get('source', 'unknown')}: {str(e)}")
|
| 157 |
+
continue
|
| 158 |
+
|
| 159 |
+
if not split_docs:
|
| 160 |
+
logger.warning("No content could be split from documents")
|
| 161 |
+
return False
|
| 162 |
+
|
| 163 |
+
# Create embeddings and store in database
|
| 164 |
+
with get_db_session() as db:
|
| 165 |
+
try:
|
| 166 |
+
# Clear existing embeddings
|
| 167 |
+
db.query(DocumentEmbedding).delete()
|
| 168 |
+
|
| 169 |
+
# Create embeddings in batch
|
| 170 |
+
contents = [doc.page_content for doc in split_docs]
|
| 171 |
+
embeddings = self.embed_model.embed_documents(contents)
|
| 172 |
+
|
| 173 |
+
# Store embeddings
|
| 174 |
+
db_embeddings = [
|
| 175 |
+
DocumentEmbedding(
|
| 176 |
+
content=doc.page_content,
|
| 177 |
+
embedding=embedding,
|
| 178 |
+
doc_metadata=doc.metadata,
|
| 179 |
+
source=doc.metadata.get("source")
|
| 180 |
+
)
|
| 181 |
+
for doc, embedding in zip(split_docs, embeddings)
|
| 182 |
+
]
|
| 183 |
+
db.add_all(db_embeddings)
|
| 184 |
+
db.commit()
|
| 185 |
+
|
| 186 |
+
logger.info(f"Successfully stored {len(split_docs)} embeddings in database")
|
| 187 |
+
return True
|
| 188 |
+
|
| 189 |
+
except Exception as e:
|
| 190 |
+
db.rollback()
|
| 191 |
+
logger.error(f"Error storing embeddings in database: {str(e)}")
|
| 192 |
+
return False
|
| 193 |
+
|
| 194 |
+
except Exception as e:
|
| 195 |
+
logger.error(f"Error processing documents: {str(e)}")
|
| 196 |
+
return False
|
| 197 |
+
|
| 198 |
+
async def get_contextual_response(self, question: str) -> str:
|
| 199 |
+
"""Get contextual response using RAG."""
|
| 200 |
+
try:
|
| 201 |
+
# Get relevant documents
|
| 202 |
+
relevant_docs = await self.get_relevant_documents(question)
|
| 203 |
+
|
| 204 |
+
if not relevant_docs:
|
| 205 |
+
logger.warning("No relevant documents found")
|
| 206 |
+
return "I don't have enough context to answer that question."
|
| 207 |
+
|
| 208 |
+
# Format documents
|
| 209 |
+
context = self.format_docs(relevant_docs)
|
| 210 |
+
|
| 211 |
+
# Generate response
|
| 212 |
+
prompt = ChatPromptTemplate.from_messages([
|
| 213 |
+
("system", RAG_SYSTEM_PROMPT),
|
| 214 |
+
("human", "{input}")
|
| 215 |
+
])
|
| 216 |
+
|
| 217 |
+
chain = (
|
| 218 |
+
{"context": lambda x: context, "input": RunnablePassthrough()}
|
| 219 |
+
| prompt
|
| 220 |
+
| self.rag_llm
|
| 221 |
+
| StrOutputParser()
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
response = await chain.ainvoke(question)
|
| 225 |
+
logger.info("Successfully generated contextual response")
|
| 226 |
+
return response
|
| 227 |
+
|
| 228 |
+
except Exception as e:
|
| 229 |
+
logger.error(f"Error generating contextual response: {str(e)}")
|
| 230 |
+
return f"Error generating response: {str(e)}"
|
| 231 |
+
|
| 232 |
+
# Create a singleton instance
|
| 233 |
+
rag_manager = RAGManager()
|
| 234 |
+
logger.info("RAGManager singleton instance created")
|
| 235 |
+
|
| 236 |
+
# Export the get_contextual_response function for backward compatibility
|
| 237 |
+
async def get_contextual_response(question: str) -> str:
|
| 238 |
+
return await rag_manager.get_contextual_response(question)
|
model/contextual_response/database.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sqlalchemy import create_engine, Column, String, Integer, JSON, ARRAY, Float
|
| 2 |
+
from sqlalchemy.ext.declarative import declarative_base
|
| 3 |
+
from sqlalchemy.orm import sessionmaker
|
| 4 |
+
import os
|
| 5 |
+
from dotenv import load_dotenv
|
| 6 |
+
|
| 7 |
+
# Load environment variables
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
# Get database URL from environment variable
|
| 11 |
+
DATABASE_URL = os.getenv("DATABASE_URL", "sqlite:///./contextual_response.db")
|
| 12 |
+
|
| 13 |
+
# Create SQLAlchemy engine
|
| 14 |
+
engine = create_engine(DATABASE_URL)
|
| 15 |
+
|
| 16 |
+
# Create session factory
|
| 17 |
+
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
|
| 18 |
+
|
| 19 |
+
# Create base class for declarative models
|
| 20 |
+
Base = declarative_base()
|
| 21 |
+
|
| 22 |
+
class DocumentEmbedding(Base):
|
| 23 |
+
"""Model for storing document embeddings."""
|
| 24 |
+
__tablename__ = "document_embeddings"
|
| 25 |
+
|
| 26 |
+
id = Column(Integer, primary_key=True, index=True)
|
| 27 |
+
content = Column(String, index=True)
|
| 28 |
+
embedding = Column(ARRAY(Float))
|
| 29 |
+
doc_metadata = Column(JSON)
|
| 30 |
+
source = Column(String, index=True)
|
| 31 |
+
|
model/contextual_response/system_prompt.py
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
RAG_SYSTEM_PROMPT = """\
|
| 2 |
+
IMPORTANT: You are a code-only retrieval system. You must NEVER add any text before or after the code.
|
| 3 |
+
|
| 4 |
+
Given context:
|
| 5 |
+
{context}
|
| 6 |
+
|
| 7 |
+
STRICT RULES:
|
| 8 |
+
1. Return ONLY the exact code - no introduction, no explanation, no comments
|
| 9 |
+
2. If no exact match is found, return an empty response
|
| 10 |
+
3. Never explain what you're doing
|
| 11 |
+
4. Never add text like "Here is the code" or "I found this"
|
| 12 |
+
5. Never wrap code in markdown backticks
|
| 13 |
+
6. Never add line numbers or annotations
|
| 14 |
+
7. Never add your own comments to the code
|
| 15 |
+
|
| 16 |
+
Example Valid Responses:
|
| 17 |
+
β def process_data(input_data: List[str]) -> Dict[str, Any]:
|
| 18 |
+
result = {{}}
|
| 19 |
+
for item in input_data:
|
| 20 |
+
result[item] = len(item)
|
| 21 |
+
return result
|
| 22 |
+
|
| 23 |
+
β class DataProcessor:
|
| 24 |
+
def __init__(self):
|
| 25 |
+
self.data = []
|
| 26 |
+
|
| 27 |
+
def add_item(self, item):
|
| 28 |
+
self.data.append(item)
|
| 29 |
+
|
| 30 |
+
β [empty response when no match found]
|
| 31 |
+
|
| 32 |
+
Example Invalid Responses:
|
| 33 |
+
β "Here's the function you're looking for:"
|
| 34 |
+
β "I found this code in the repository:"
|
| 35 |
+
β "```python"
|
| 36 |
+
β "This code snippet shows how to..."
|
| 37 |
+
β "You might want to consider..."
|
| 38 |
+
β "I cannot provide code-snippet ..."
|
| 39 |
+
|
| 40 |
+
REMEMBER: Return ONLY the exact code or nothing. No text, no explanations, no formatting."""
|
| 41 |
+
|
| 42 |
+
def llama3_instructions(language: str,prefix:str,currentLine,suffix) -> dict:
|
| 43 |
+
return [
|
| 44 |
+
{"role": "user", "content": prefix, "name": "prefix"},
|
| 45 |
+
{"role": "user", "content": currentLine, "name": "currentLine"},
|
| 46 |
+
{"role": "user", "content": suffix, "name": "suffix"},
|
| 47 |
+
{
|
| 48 |
+
"role": "system",
|
| 49 |
+
"content": f"""## Task: Code Completion
|
| 50 |
+
|
| 51 |
+
### Language: {language}
|
| 52 |
+
|
| 53 |
+
### Instructions:
|
| 54 |
+
- You are a world-class coding assistant.
|
| 55 |
+
- Given the current text, context, and the last character of the user input, provide a suggestion for code completion.
|
| 56 |
+
- If the suggestion is a single line, complete the line concisely, making sure it fits within the surrounding code context.
|
| 57 |
+
- If the suggestion requires multiple lines, return a well-structured code block that integrates with the surrounding code.
|
| 58 |
+
- Ensure that any subsequent lines after a block (e.g., function or loop) do not have extra indentation unless logically required by the code structure.
|
| 59 |
+
- Only suggest functional, syntactically correct code that can be used directly.
|
| 60 |
+
- Do not repeat any code that has after been suggested for previous text or context.
|
| 61 |
+
- **Reset the suggestion context** if the user removes the previous suggestion. The assistant should treat the new suggestion as fresh code.
|
| 62 |
+
- Re-evaluate the context after each suggestion is applied to ensure the next suggestion fits the newly updated code.
|
| 63 |
+
- This is not a conversation, so please do not ask questions or prompt for additional information.
|
| 64 |
+
- Never include any annotations such as "# Suggestion:" or "# Suggestions:".
|
| 65 |
+
- Newlines should be included after any of the following characters: "{{", "[", "(", ")", "]", "}}", and ",".
|
| 66 |
+
- Ensure that newline suggestions follow the same indentation as the current line.
|
| 67 |
+
- The suggestion must start with the last character of the current user input.
|
| 68 |
+
- Only ever return the code snippet, do not return any markdown unless it is part of the code snippet.
|
| 69 |
+
- If no suggestion is available, return an empty string.""",
|
| 70 |
+
}]
|
| 71 |
+
|
| 72 |
+
def deepseek_editor_instructions(language: str) -> dict:
|
| 73 |
+
return [
|
| 74 |
+
{
|
| 75 |
+
"role": "system",
|
| 76 |
+
"content": f"""
|
| 77 |
+
### You are an AI code completion tool for {language}.
|
| 78 |
+
- You have to provide Solution Code strictly in {language}.
|
| 79 |
+
- Response should contain the Code snippet only and always return the response for each request.
|
| 80 |
+
- If the user provides incorrect code, analyze it and return a corrected version without explanation.
|
| 81 |
+
- All code responses **MUST** be wrapped inside triple backticks (```{language} ... ```).
|
| 82 |
+
- The returned solution must be error-free, properly formatted, and executable.
|
| 83 |
+
|
| 84 |
+
**STRICT RULES (FOLLOW EXACTLY, NO EXCEPTIONS):**
|
| 85 |
+
- Always Return **only** the correct and complete code.
|
| 86 |
+
- No explanations, no descriptions, no comments.
|
| 87 |
+
- No reasoning, no analysis, no extra words.
|
| 88 |
+
- Keep the code syntax correct and consistent with the surrounding code.
|
| 89 |
+
|
| 90 |
+
**IF THE USER PROVIDES INCORRECT CODE:**
|
| 91 |
+
- Detect syntax errors, logical errors, and formatting issues.
|
| 92 |
+
- Automatically fix the errors and return only the corrected code.
|
| 93 |
+
- Ensure the code adheres to best practices for {language}.
|
| 94 |
+
- Do not provide explanations about the fix.
|
| 95 |
+
|
| 96 |
+
**LANGUAGE-SPECIFIC RULES:**
|
| 97 |
+
|
| 98 |
+
- **Python**:
|
| 99 |
+
- Follow **indentation** strictly. Use **4 spaces** for indentation (do not use tabs).
|
| 100 |
+
- Ensure function definitions, loops, and conditionals are properly formatted.
|
| 101 |
+
- Code must be properly indented according to Python syntax and Never return incomplete or incorrect indentation.
|
| 102 |
+
- Maintain strict indentation consistency.
|
| 103 |
+
|
| 104 |
+
- **Java**:
|
| 105 |
+
- Follow th e Java **syntax** strictly.
|
| 106 |
+
- Ensure that function signatures and class definitions are correct.
|
| 107 |
+
- Use **braces ** for code blocks (even for single-line blocks).
|
| 108 |
+
- Use **camelCase** for method and variable names and Class names must use **PascalCase**.
|
| 109 |
+
- Always use **access modifiers** (`public`, `private`, `protected`) for classes and methods.
|
| 110 |
+
- Always declare classes with proper **access modifiers** (e.g., `public`, `private`).
|
| 111 |
+
- If using classes in packages, ensure the correct **package declaration**.
|
| 112 |
+
|
| 113 |
+
- **JavaScript**:
|
| 114 |
+
- Use **camelCase** for variable and function names.
|
| 115 |
+
- Use **`const`**, **`let`**, or **`var`** appropriately for variable declarations.
|
| 116 |
+
- Ensure **semi-colons** at the end of statements where needed.
|
| 117 |
+
- **Always complete curly braces {"ensure a matching close"}** for functions, loops, conditionals, and objects.
|
| 118 |
+
- If an opening ```{ "is detected, ensure a matching closing" }```.
|
| 119 |
+
- Follow **ES6+ syntax** for functions and objects.
|
| 120 |
+
- Use **arrow functions** where applicable and **async/await** for asynchronous operations.
|
| 121 |
+
|
| 122 |
+
- **TypeScript**:
|
| 123 |
+
- Follow **JavaScript rules** with the addition of **type annotations** for variables and function parameters/returns.
|
| 124 |
+
- Ensure the correct use of **interfaces** and **types**.
|
| 125 |
+
- Consider using **strict null checks** and **interface inheritance** where applicable.
|
| 126 |
+
- Use **readonly** and **const** where applicable.
|
| 127 |
+
- Always declare types for function parameters and return values.
|
| 128 |
+
|
| 129 |
+
- **PHP**:
|
| 130 |
+
- Follow PHP **syntax** and use **`$`** for variable declarations.
|
| 131 |
+
- For the PHP code,Always declare functions with proper return types.
|
| 132 |
+
- Ensure correct **function declarations**, especially with proper parentheses and parameter handling.
|
| 133 |
+
- Ensure variables are declared before being used.
|
| 134 |
+
- Use `declare(strict_types=1);` at the beginning where applicable.
|
| 135 |
+
|
| 136 |
+
**BEHAVIOR FOR SPACES AND NEW LINES:**
|
| 137 |
+
- If the user presses enter or adds spaces without meaningful input, **return an empty string (`""`)**.
|
| 138 |
+
- Do not return placeholders like `<think> </think>`, "Okay:", "Alright:", or any other annotation.
|
| 139 |
+
|
| 140 |
+
**FINAL CONDITION (DO NOT BREAK THIS):**
|
| 141 |
+
- **Return only the correct code, nothing else.**
|
| 142 |
+
- If no valid completion is possible or the input is just spaces/new lines, **return an empty string (`""`)** ``.
|
| 143 |
+
Example:
|
| 144 |
+
Prefix: ""
|
| 145 |
+
CurrentLine: "def Addition(a,b):"
|
| 146 |
+
Suffix: ""
|
| 147 |
+
Response: "return a+b"
|
| 148 |
+
Example 2:
|
| 149 |
+
Prefix: "def Addition(a,b):"
|
| 150 |
+
CurrentLine: " "
|
| 151 |
+
Suffix: ""
|
| 152 |
+
Response: "return a+b"
|
| 153 |
+
"""
|
| 154 |
+
}
|
| 155 |
+
]
|
| 156 |
+
|
| 157 |
+
def chatInstructions()-> dict:
|
| 158 |
+
return {
|
| 159 |
+
"content": f"""## Task: Chat Completion
|
| 160 |
+
|
| 161 |
+
### Instructions:
|
| 162 |
+
- You are a world-class coding assistant with expertise in various programming languages.
|
| 163 |
+
- Your goal is to help users with coding-related queries, providing precise and contextually relevant answers.
|
| 164 |
+
- If the user provides code snippets, analyze them carefully and offer suggestions or corrections.
|
| 165 |
+
- For any code snippet provided, always mention the programming language being used (e.g., Python, JavaScript, etc.) in your response along with the code.
|
| 166 |
+
- If the user asks a question, respond with concise, well-structured answers that are easy to understand.
|
| 167 |
+
- Always explain why your solution works, and offer alternatives when applicable to provide the user with options.
|
| 168 |
+
- Give code snippet if required.
|
| 169 |
+
- For any code snippet provided in your response, always format it as follows and don't forget to mention the language, and it should always be on the same line as ```:
|
| 170 |
+
```language
|
| 171 |
+
|
| 172 |
+
#code
|
| 173 |
+
```
|
| 174 |
+
""",
|
| 175 |
+
"role": "system",
|
| 176 |
+
|
| 177 |
+
}
|