Spaces:

samithcs
/

RAG_Book_QA_System

Sleeping

App Files Files Community

samithcs commited on Sep 24, 2025

Commit

a216632

verified ·

1 Parent(s): ce80d21

Delete app

Browse files

Files changed (14) hide show

app/__init__.py +0 -0
app/__pycache__/__init__.cpython-313.pyc +0 -0
app/__pycache__/gradio_app.cpython-313.pyc +0 -0
app/__pycache__/logging.cpython-313.pyc +0 -0
app/__pycache__/main.cpython-313.pyc +0 -0
app/api/__init__.py +0 -5
app/api/__pycache__/__init__.cpython-313.pyc +0 -0
app/api/__pycache__/routes.cpython-313.pyc +0 -0
app/api/routes.py +0 -67
app/api/schemas.py +0 -0
app/app.py +0 -4
app/gradio_app.py +0 -108
app/logger.py +0 -0
app/main.py +0 -12

app/__init__.py DELETED Viewed

File without changes

app/__pycache__/__init__.cpython-313.pyc DELETED Viewed

Binary file (148 Bytes)

app/__pycache__/gradio_app.cpython-313.pyc DELETED Viewed

Binary file (5.27 kB)

app/__pycache__/logging.cpython-313.pyc DELETED Viewed

Binary file (147 Bytes)

app/__pycache__/main.cpython-313.pyc DELETED Viewed

Binary file (322 Bytes)

app/api/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from fastapi import FastAPI
-from app.api.routes import router
-app = FastAPI()
-app.include_router(router)

app/api/__pycache__/__init__.cpython-313.pyc DELETED Viewed

Binary file (152 Bytes)

app/api/__pycache__/routes.cpython-313.pyc DELETED Viewed

Binary file (1.94 kB)

app/api/routes.py DELETED Viewed

@@ -1,67 +0,0 @@
-from fastapi import APIRouter, File, UploadFile
-from pathlib import Path
-from pipeline.ingest.pdf_parser import PDFParser
-from pipeline.ingest.docx_parser import DOCXParser
-from pipeline.ingest.txt_parser import TXTParser
-from pipeline.ingest.html_parser import HTMLParser
-from fastapi import Request
-from pipeline.rag.retrieval_engine import answer_question
-from app.logger import logging
-router = APIRouter()
-@router.post("/upload")
-async def upload_file(file: UploadFile = File(...)):
-    save_dir = Path("data/raw/")
-    save_dir.mkdir(parents=True, exist_ok=True)
-    ext = Path(file.filename).suffix.lower()
-    file_path = save_dir / file.filename
-    with open(file_path, "wb") as f:
-        f.write(await file.read())
-    if ext == ".pdf":
-        parser = PDFParser()
-    elif ext == ".docx":
-        parser = DOCXParser()
-    elif ext == ".txt":
-        parser = TXTParser()
-    elif ext in [".html", ".htm"]:
-        parser = HTMLParser()
-    else:
-        return {"error": "Unsupported file type!"}
-    text, metadata = parser.extract_text_and_metadata(str(file_path))
-    return {"filename": file.filename, "preview": text[:500], "metadata": metadata}
-@router.post("/ask")
-async def ask_question(request: Request):
-    data = await request.json()
-    question = data.get("question")
-    if not question:
-        return {"error": "No question provided."}
-    # Call your RAG pipeline (update these params as needed!)
-    answer_pack = answer_question(
-        question=question,
-        embed_model="all-MiniLM-L6-v2",
-        store_type="faiss",
-        store_kwargs={"dim": 384},
-        llm_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-        top_k=3,
-    )
-    logging.info(f"Question answered: '{question}'")
-    return {
-        "answer": answer_pack["answer"],
-        "chunks": answer_pack["chunks"],
-        "context": answer_pack["context"]
-    }
-@router.post("/feedback")
-async def feedback(request: Request):
-    data = await request.json()
-    with open("feedback.csv", "a") as f:
-        f.write(f"{data.get('question','')},{data.get('answer','')},{data.get('rating','')}\n")
-    logging.info(f"Feedback received for: '{data.get('question','')}'")
-    return {"success": True}

app/api/schemas.py DELETED Viewed

File without changes

app/app.py DELETED Viewed

@@ -1,4 +0,0 @@
-from gradio_app import iface
-if __name__ == "__main__":
-    iface.launch()

app/gradio_app.py DELETED Viewed

@@ -1,108 +0,0 @@
-import gradio as gr
-from pathlib import Path
-import os
-import re
-from pipeline.ingest.pdf_parser import PDFParser
-from pipeline.ingest.docx_parser import DOCXParser
-from pipeline.ingest.txt_parser import TXTParser
-from pipeline.ingest.html_parser import HTMLParser
-from pipeline.chunking.fixed_chunker import FixedChunker
-from pipeline.embeddings.sentence_transformer_embed import embed_chunks
-from pipeline.vector_store.faiss_store import FaissStore
-from pipeline.rag.retrieval_engine import answer_question
-FAISS_INDEX_PATH = "data/faiss.index"
-EMBED_DIM = 384
-def sanitize_filename(filename):
-    return re.sub(r'[^a-zA-Z0-9_.-]', '_', filename)
-def process_and_qa(file, question):
-    try:
-        save_dir = Path("data/raw/")
-        save_dir.mkdir(parents=True, exist_ok=True)
-        filename = sanitize_filename(getattr(file, "name", "uploaded_file"))
-        file_path = save_dir / Path(filename).name
-        content = None
-        if hasattr(file, "read"):
-            content = file.read()
-        elif hasattr(file, "data"):
-            content = file.data
-        elif isinstance(file, bytes):
-            content = file
-        elif isinstance(file, str) and os.path.exists(file):
-            content = None
-            file_path = file
-        else:
-            return "Invalid file object format!", "Error", "Error"
-        if content:
-            with open(file_path, "wb") as f:
-                f.write(content)
-        ext = Path(filename).suffix.lower()
-        if ext == ".pdf":
-            parser = PDFParser()
-        elif ext == ".docx":
-            parser = DOCXParser()
-        elif ext == ".txt":
-            parser = TXTParser()
-        elif ext in [".html", ".htm"]:
-            parser = HTMLParser()
-        else:
-            return "Unsupported filetype.", "", ""
-        try:
-            text, metadata = parser.extract_text_and_metadata(str(file_path))
-            chunks = FixedChunker().chunk(text, chunk_size=512, overlap=64)
-            #print(f"Chunks parsed: {len(chunks)}")
-            embeddings = embed_chunks(chunks, model_name="all-MiniLM-L6-v2")
-            #print(f"Embeddings computed: {len(embeddings)}")
-            metadatas = [{} for _ in chunks]
-            store = FaissStore(dim=EMBED_DIM, index_path=FAISS_INDEX_PATH)
-            if os.path.exists(FAISS_INDEX_PATH):
-                store.load()
-            store.add_documents(chunks, embeddings, metadatas)
-            store.save()
-            #print("Index updated.")
-        except Exception as e:
-            return f"Failed to extract: {repr(e)}", "", ""
-        qa_result = answer_question(
-            question=question,
-            embed_model="all-MiniLM-L6-v2",
-            store_type="faiss",
-            store_kwargs={"dim": EMBED_DIM, "index_path": FAISS_INDEX_PATH},
-            llm_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-            top_k=5,
-        )
-        answer = qa_result["answer"]
-        matched_chunks = qa_result.get("chunks", [])
-        #print("QA chunks:", matched_chunks)
-        context = "\n\n---\n\n".join([c["text"] for c in matched_chunks]) if matched_chunks else "No supporting context found."
-        return f"Preview (first 500 chars):\n{text[:500]}", answer, context
-    except Exception as e:
-        # print("GRADIO ERROR:", str(e))
-        return f"Error: {e}", "Error", "Error"
-iface = gr.Interface(
-    fn=process_and_qa,
-    inputs=[
-        gr.File(label="Upload PDF, DOCX, TXT, or HTML"),
-        gr.Textbox(label="Question"),
-    ],
-    outputs=[
-        gr.Textbox(label="Extracted/Text Preview", lines=10, show_copy_button=True),
-        gr.Textbox(label="Answer", lines=6, show_copy_button=True),
-        gr.Textbox(label="Matched Context", lines=12, show_copy_button=True)
-    ],
-    title="Book/Document QA",
-    description="Upload your document, ask a question, and see the answer with cited context!"
-)
-if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860)

app/logger.py DELETED Viewed

File without changes

app/main.py DELETED Viewed

@@ -1,12 +0,0 @@
-from fastapi import FastAPI
-from app.api.routes import router
-app = FastAPI(
-    title="RAG Book QA System API",
-    docs_url="/docs"
-)
-app.include_router(router)
-@app.get("/health")
-def health_check():
-    return {"status": "ok"}