Spaces:

anabaslabs
/

ARC

Running

App Files Files Community

github-actions[bot] commited on about 1 month ago

Commit

afe8edb

1 Parent(s): cb5ebf4

Deploy from GitHub Actions: 98954c768f33c431a29875735f8c8c7497db8fbb

Browse files

Files changed (3) hide show

app/config.py +16 -15
app/rag/pipeline.py +0 -21
app/routes/upload.py +1 -9

app/config.py CHANGED Viewed

@@ -41,21 +41,22 @@ ALLOWED_TYPES = {
     "pptx",
 }
-PROMPT = (
-    "You are ARC, a helpful document assistant. "
-    "Your goal is to provide accurate and helpful answers based on the context provided. "
-    "If the user asks for a summary, synthesize the context into a clear, structured overview. "
-    "If the context contains math or LaTeX, preserve them using $ for inline and $$ for display math. "
-    "If you cannot find the answer in the context, say so honestly, but try to be as helpful as possible with the information you have. "
-    "Context: {context} Question: {question}"
-)
-SUMMARY_PROMPT = (
-    "Provide a concise yet comprehensive summary of the following document content. "
-    "Focus on the main topics, key points, and overall purpose of the document. "
-    "This summary will be used to help a chatbot understand the document at a high level. "
-    "Content: {content}"
-)
 CREATORS = [
     {"Krishnendu Das" : "https://itskdhere.com"},

     "pptx",
 }
+PROMPT = """You are ARC (Augmented Retrieval Chatbot), an intelligent and precise document assistant.
+Your primary goal is to provide accurate, helpful, and well-structured answers based strictly on the provided context.
+Follow these guidelines:
+1. Base your answers ONLY on the provided context.
+2. Provide a concise yet comprehensive summary of the document content if the user asks for an overview. Focus on the main topics, key points, and overall purpose of the document to help the user understand it at a high level.
+3. If the context contains math or LaTeX, strictly preserve them using $ for inline math and $$ for display math.
+4. If the answer cannot be found in the context, state so honestly. Do not hallucinate or make up information, but be as helpful as possible with the provided information.
+5. Use clear markdown formatting (bullet points, bold text, headings) to structure your response.
+Context:
+{context}
+Question:
+{question}
+"""
 CREATORS = [
     {"Krishnendu Das" : "https://itskdhere.com"},

app/rag/pipeline.py CHANGED Viewed

@@ -13,8 +13,6 @@ from app.rag.loader import (
 )
 from app.rag.vectorstore import add_documents
 from langchain_core.documents import Document
-from app.config import CHAT_MODEL, GOOGLE_API_KEY, SUMMARY_PROMPT
-from langchain_google_genai import ChatGoogleGenerativeAI
 LOADERS = {
     "pdf": read_pdf,
@@ -28,7 +26,6 @@ LOADERS = {
     "pptx": read_pptx,
 }
-llm = ChatGoogleGenerativeAI(model=CHAT_MODEL, google_api_key=GOOGLE_API_KEY)
 def _clean_docs(docs: list[Document]) -> list[Document]:
     for doc in docs:
@@ -36,14 +33,6 @@ def _clean_docs(docs: list[Document]) -> list[Document]:
         doc.page_content = process_latex(doc.page_content)
     return docs
-def _generate_summary(docs: list[Document]) -> str:
-    full_text = "\n\n".join(doc.page_content for doc in docs[:10])
-    try:
-        response = llm.invoke(SUMMARY_PROMPT.format(content=full_text))
-        return str(response.content)
-    except Exception as e:
-        print(f"Error generating summary: {e}")
-        return ""
 def process_file(path: str, ext: str, session_id: str = "default_index") -> int:
     loader = LOADERS.get(ext.lower())
@@ -52,16 +41,6 @@ def process_file(path: str, ext: str, session_id: str = "default_index") -> int:
     docs = loader(path)
     docs = _clean_docs(docs)
-    # summary_text = _generate_summary(docs)
     chunks = chunk_docs(docs)
-    # if summary_text:
-    #     src = docs[0].metadata.get("source", "unknown")
-    #     summary_doc = Document(
-    #         page_content=f"DOCUMENT SUMMARY of {src}: {summary_text}",
-    #         metadata={"source": src, "is_summary": True}
-    #     )
-    #     chunks.insert(0, summary_doc)
     add_documents(chunks, session_id=session_id)
     return len(chunks)

 )
 from app.rag.vectorstore import add_documents
 from langchain_core.documents import Document
 LOADERS = {
     "pdf": read_pdf,
     "pptx": read_pptx,
 }
 def _clean_docs(docs: list[Document]) -> list[Document]:
     for doc in docs:
         doc.page_content = process_latex(doc.page_content)
     return docs
 def process_file(path: str, ext: str, session_id: str = "default_index") -> int:
     loader = LOADERS.get(ext.lower())
     docs = loader(path)
     docs = _clean_docs(docs)
     chunks = chunk_docs(docs)
     add_documents(chunks, session_id=session_id)
     return len(chunks)

app/routes/upload.py CHANGED Viewed

@@ -8,11 +8,7 @@ router = APIRouter()
 @router.post("/upload")
-async def upload_files(
-    files: list[UploadFile] = File(...),
-    session_id: str = Form(...),
-    user_id: str = Depends(get_user_id)
-) -> dict:
     prefixed_session_id = f"{user_id}_{session_id}"
     results = []
     errors = []
@@ -26,10 +22,6 @@ async def upload_files(
     for file in files:
         original_name = file.filename or f"upload.bin"
         safe_name = os.path.basename(original_name)
-        if not safe_name:
-            errors.append({"source": original_name, "error": "Invalid filename"})
-            continue
         ext = safe_name.rsplit(".", 1)[-1].lower()
         if ext not in ALLOWED_TYPES:
             errors.append({"source": original_name, "error": f"Unsupported file type: .{ext}"})

 @router.post("/upload")
+async def upload_files(files: list[UploadFile] = File(...), session_id: str = Form(...), user_id: str = Depends(get_user_id)) -> dict:
     prefixed_session_id = f"{user_id}_{session_id}"
     results = []
     errors = []
     for file in files:
         original_name = file.filename or f"upload.bin"
         safe_name = os.path.basename(original_name)
         ext = safe_name.rsplit(".", 1)[-1].lower()
         if ext not in ALLOWED_TYPES:
             errors.append({"source": original_name, "error": f"Unsupported file type: .{ext}"})