github-actions[bot] commited on
Commit
afe8edb
·
1 Parent(s): cb5ebf4

Deploy from GitHub Actions: 98954c768f33c431a29875735f8c8c7497db8fbb

Browse files
Files changed (3) hide show
  1. app/config.py +16 -15
  2. app/rag/pipeline.py +0 -21
  3. app/routes/upload.py +1 -9
app/config.py CHANGED
@@ -41,21 +41,22 @@ ALLOWED_TYPES = {
41
  "pptx",
42
  }
43
 
44
- PROMPT = (
45
- "You are ARC, a helpful document assistant. "
46
- "Your goal is to provide accurate and helpful answers based on the context provided. "
47
- "If the user asks for a summary, synthesize the context into a clear, structured overview. "
48
- "If the context contains math or LaTeX, preserve them using $ for inline and $$ for display math. "
49
- "If you cannot find the answer in the context, say so honestly, but try to be as helpful as possible with the information you have. "
50
- "Context: {context} Question: {question}"
51
- )
52
-
53
- SUMMARY_PROMPT = (
54
- "Provide a concise yet comprehensive summary of the following document content. "
55
- "Focus on the main topics, key points, and overall purpose of the document. "
56
- "This summary will be used to help a chatbot understand the document at a high level. "
57
- "Content: {content}"
58
- )
 
59
 
60
  CREATORS = [
61
  {"Krishnendu Das" : "https://itskdhere.com"},
 
41
  "pptx",
42
  }
43
 
44
+ PROMPT = """You are ARC (Augmented Retrieval Chatbot), an intelligent and precise document assistant.
45
+ Your primary goal is to provide accurate, helpful, and well-structured answers based strictly on the provided context.
46
+
47
+ Follow these guidelines:
48
+ 1. Base your answers ONLY on the provided context.
49
+ 2. Provide a concise yet comprehensive summary of the document content if the user asks for an overview. Focus on the main topics, key points, and overall purpose of the document to help the user understand it at a high level.
50
+ 3. If the context contains math or LaTeX, strictly preserve them using $ for inline math and $$ for display math.
51
+ 4. If the answer cannot be found in the context, state so honestly. Do not hallucinate or make up information, but be as helpful as possible with the provided information.
52
+ 5. Use clear markdown formatting (bullet points, bold text, headings) to structure your response.
53
+
54
+ Context:
55
+ {context}
56
+
57
+ Question:
58
+ {question}
59
+ """
60
 
61
  CREATORS = [
62
  {"Krishnendu Das" : "https://itskdhere.com"},
app/rag/pipeline.py CHANGED
@@ -13,8 +13,6 @@ from app.rag.loader import (
13
  )
14
  from app.rag.vectorstore import add_documents
15
  from langchain_core.documents import Document
16
- from app.config import CHAT_MODEL, GOOGLE_API_KEY, SUMMARY_PROMPT
17
- from langchain_google_genai import ChatGoogleGenerativeAI
18
 
19
  LOADERS = {
20
  "pdf": read_pdf,
@@ -28,7 +26,6 @@ LOADERS = {
28
  "pptx": read_pptx,
29
  }
30
 
31
- llm = ChatGoogleGenerativeAI(model=CHAT_MODEL, google_api_key=GOOGLE_API_KEY)
32
 
33
  def _clean_docs(docs: list[Document]) -> list[Document]:
34
  for doc in docs:
@@ -36,14 +33,6 @@ def _clean_docs(docs: list[Document]) -> list[Document]:
36
  doc.page_content = process_latex(doc.page_content)
37
  return docs
38
 
39
- def _generate_summary(docs: list[Document]) -> str:
40
- full_text = "\n\n".join(doc.page_content for doc in docs[:10])
41
- try:
42
- response = llm.invoke(SUMMARY_PROMPT.format(content=full_text))
43
- return str(response.content)
44
- except Exception as e:
45
- print(f"Error generating summary: {e}")
46
- return ""
47
 
48
  def process_file(path: str, ext: str, session_id: str = "default_index") -> int:
49
  loader = LOADERS.get(ext.lower())
@@ -52,16 +41,6 @@ def process_file(path: str, ext: str, session_id: str = "default_index") -> int:
52
 
53
  docs = loader(path)
54
  docs = _clean_docs(docs)
55
- # summary_text = _generate_summary(docs)
56
  chunks = chunk_docs(docs)
57
-
58
- # if summary_text:
59
- # src = docs[0].metadata.get("source", "unknown")
60
- # summary_doc = Document(
61
- # page_content=f"DOCUMENT SUMMARY of {src}: {summary_text}",
62
- # metadata={"source": src, "is_summary": True}
63
- # )
64
- # chunks.insert(0, summary_doc)
65
-
66
  add_documents(chunks, session_id=session_id)
67
  return len(chunks)
 
13
  )
14
  from app.rag.vectorstore import add_documents
15
  from langchain_core.documents import Document
 
 
16
 
17
  LOADERS = {
18
  "pdf": read_pdf,
 
26
  "pptx": read_pptx,
27
  }
28
 
 
29
 
30
  def _clean_docs(docs: list[Document]) -> list[Document]:
31
  for doc in docs:
 
33
  doc.page_content = process_latex(doc.page_content)
34
  return docs
35
 
 
 
 
 
 
 
 
 
36
 
37
  def process_file(path: str, ext: str, session_id: str = "default_index") -> int:
38
  loader = LOADERS.get(ext.lower())
 
41
 
42
  docs = loader(path)
43
  docs = _clean_docs(docs)
 
44
  chunks = chunk_docs(docs)
 
 
 
 
 
 
 
 
 
45
  add_documents(chunks, session_id=session_id)
46
  return len(chunks)
app/routes/upload.py CHANGED
@@ -8,11 +8,7 @@ router = APIRouter()
8
 
9
 
10
  @router.post("/upload")
11
- async def upload_files(
12
- files: list[UploadFile] = File(...),
13
- session_id: str = Form(...),
14
- user_id: str = Depends(get_user_id)
15
- ) -> dict:
16
  prefixed_session_id = f"{user_id}_{session_id}"
17
  results = []
18
  errors = []
@@ -26,10 +22,6 @@ async def upload_files(
26
  for file in files:
27
  original_name = file.filename or f"upload.bin"
28
  safe_name = os.path.basename(original_name)
29
- if not safe_name:
30
- errors.append({"source": original_name, "error": "Invalid filename"})
31
- continue
32
-
33
  ext = safe_name.rsplit(".", 1)[-1].lower()
34
  if ext not in ALLOWED_TYPES:
35
  errors.append({"source": original_name, "error": f"Unsupported file type: .{ext}"})
 
8
 
9
 
10
  @router.post("/upload")
11
+ async def upload_files(files: list[UploadFile] = File(...), session_id: str = Form(...), user_id: str = Depends(get_user_id)) -> dict:
 
 
 
 
12
  prefixed_session_id = f"{user_id}_{session_id}"
13
  results = []
14
  errors = []
 
22
  for file in files:
23
  original_name = file.filename or f"upload.bin"
24
  safe_name = os.path.basename(original_name)
 
 
 
 
25
  ext = safe_name.rsplit(".", 1)[-1].lower()
26
  if ext not in ALLOWED_TYPES:
27
  errors.append({"source": original_name, "error": f"Unsupported file type: .{ext}"})