samithcs commited on
Commit
a216632
·
verified ·
1 Parent(s): ce80d21

Delete app

Browse files
app/__init__.py DELETED
File without changes
app/__pycache__/__init__.cpython-313.pyc DELETED
Binary file (148 Bytes)
 
app/__pycache__/gradio_app.cpython-313.pyc DELETED
Binary file (5.27 kB)
 
app/__pycache__/logging.cpython-313.pyc DELETED
Binary file (147 Bytes)
 
app/__pycache__/main.cpython-313.pyc DELETED
Binary file (322 Bytes)
 
app/api/__init__.py DELETED
@@ -1,5 +0,0 @@
1
- from fastapi import FastAPI
2
- from app.api.routes import router
3
-
4
- app = FastAPI()
5
- app.include_router(router)
 
 
 
 
 
 
app/api/__pycache__/__init__.cpython-313.pyc DELETED
Binary file (152 Bytes)
 
app/api/__pycache__/routes.cpython-313.pyc DELETED
Binary file (1.94 kB)
 
app/api/routes.py DELETED
@@ -1,67 +0,0 @@
1
- from fastapi import APIRouter, File, UploadFile
2
- from pathlib import Path
3
- from pipeline.ingest.pdf_parser import PDFParser
4
- from pipeline.ingest.docx_parser import DOCXParser
5
- from pipeline.ingest.txt_parser import TXTParser
6
- from pipeline.ingest.html_parser import HTMLParser
7
- from fastapi import Request
8
- from pipeline.rag.retrieval_engine import answer_question
9
- from app.logger import logging
10
-
11
-
12
- router = APIRouter()
13
-
14
- @router.post("/upload")
15
- async def upload_file(file: UploadFile = File(...)):
16
- save_dir = Path("data/raw/")
17
- save_dir.mkdir(parents=True, exist_ok=True)
18
- ext = Path(file.filename).suffix.lower()
19
- file_path = save_dir / file.filename
20
-
21
- with open(file_path, "wb") as f:
22
- f.write(await file.read())
23
-
24
- if ext == ".pdf":
25
- parser = PDFParser()
26
- elif ext == ".docx":
27
- parser = DOCXParser()
28
- elif ext == ".txt":
29
- parser = TXTParser()
30
- elif ext in [".html", ".htm"]:
31
- parser = HTMLParser()
32
- else:
33
- return {"error": "Unsupported file type!"}
34
-
35
- text, metadata = parser.extract_text_and_metadata(str(file_path))
36
- return {"filename": file.filename, "preview": text[:500], "metadata": metadata}
37
-
38
-
39
- @router.post("/ask")
40
- async def ask_question(request: Request):
41
- data = await request.json()
42
- question = data.get("question")
43
- if not question:
44
- return {"error": "No question provided."}
45
- # Call your RAG pipeline (update these params as needed!)
46
- answer_pack = answer_question(
47
- question=question,
48
- embed_model="all-MiniLM-L6-v2",
49
- store_type="faiss",
50
- store_kwargs={"dim": 384},
51
- llm_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
52
- top_k=3,
53
- )
54
- logging.info(f"Question answered: '{question}'")
55
- return {
56
- "answer": answer_pack["answer"],
57
- "chunks": answer_pack["chunks"],
58
- "context": answer_pack["context"]
59
- }
60
-
61
- @router.post("/feedback")
62
- async def feedback(request: Request):
63
- data = await request.json()
64
- with open("feedback.csv", "a") as f:
65
- f.write(f"{data.get('question','')},{data.get('answer','')},{data.get('rating','')}\n")
66
- logging.info(f"Feedback received for: '{data.get('question','')}'")
67
- return {"success": True}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/api/schemas.py DELETED
File without changes
app/app.py DELETED
@@ -1,4 +0,0 @@
1
- from gradio_app import iface
2
-
3
- if __name__ == "__main__":
4
- iface.launch()
 
 
 
 
 
app/gradio_app.py DELETED
@@ -1,108 +0,0 @@
1
- import gradio as gr
2
- from pathlib import Path
3
- import os
4
- import re
5
-
6
- from pipeline.ingest.pdf_parser import PDFParser
7
- from pipeline.ingest.docx_parser import DOCXParser
8
- from pipeline.ingest.txt_parser import TXTParser
9
- from pipeline.ingest.html_parser import HTMLParser
10
- from pipeline.chunking.fixed_chunker import FixedChunker
11
- from pipeline.embeddings.sentence_transformer_embed import embed_chunks
12
- from pipeline.vector_store.faiss_store import FaissStore
13
- from pipeline.rag.retrieval_engine import answer_question
14
-
15
- FAISS_INDEX_PATH = "data/faiss.index"
16
- EMBED_DIM = 384
17
-
18
- def sanitize_filename(filename):
19
- return re.sub(r'[^a-zA-Z0-9_.-]', '_', filename)
20
-
21
- def process_and_qa(file, question):
22
- try:
23
- save_dir = Path("data/raw/")
24
- save_dir.mkdir(parents=True, exist_ok=True)
25
- filename = sanitize_filename(getattr(file, "name", "uploaded_file"))
26
- file_path = save_dir / Path(filename).name
27
-
28
- content = None
29
- if hasattr(file, "read"):
30
- content = file.read()
31
- elif hasattr(file, "data"):
32
- content = file.data
33
- elif isinstance(file, bytes):
34
- content = file
35
- elif isinstance(file, str) and os.path.exists(file):
36
- content = None
37
- file_path = file
38
- else:
39
- return "Invalid file object format!", "Error", "Error"
40
-
41
- if content:
42
- with open(file_path, "wb") as f:
43
- f.write(content)
44
-
45
- ext = Path(filename).suffix.lower()
46
- if ext == ".pdf":
47
- parser = PDFParser()
48
- elif ext == ".docx":
49
- parser = DOCXParser()
50
- elif ext == ".txt":
51
- parser = TXTParser()
52
- elif ext in [".html", ".htm"]:
53
- parser = HTMLParser()
54
- else:
55
- return "Unsupported filetype.", "", ""
56
-
57
-
58
- try:
59
- text, metadata = parser.extract_text_and_metadata(str(file_path))
60
- chunks = FixedChunker().chunk(text, chunk_size=512, overlap=64)
61
- #print(f"Chunks parsed: {len(chunks)}")
62
- embeddings = embed_chunks(chunks, model_name="all-MiniLM-L6-v2")
63
- #print(f"Embeddings computed: {len(embeddings)}")
64
- metadatas = [{} for _ in chunks]
65
- store = FaissStore(dim=EMBED_DIM, index_path=FAISS_INDEX_PATH)
66
- if os.path.exists(FAISS_INDEX_PATH):
67
- store.load()
68
- store.add_documents(chunks, embeddings, metadatas)
69
- store.save()
70
- #print("Index updated.")
71
- except Exception as e:
72
- return f"Failed to extract: {repr(e)}", "", ""
73
-
74
- qa_result = answer_question(
75
- question=question,
76
- embed_model="all-MiniLM-L6-v2",
77
- store_type="faiss",
78
- store_kwargs={"dim": EMBED_DIM, "index_path": FAISS_INDEX_PATH},
79
- llm_name="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
80
- top_k=5,
81
- )
82
- answer = qa_result["answer"]
83
- matched_chunks = qa_result.get("chunks", [])
84
- #print("QA chunks:", matched_chunks)
85
- context = "\n\n---\n\n".join([c["text"] for c in matched_chunks]) if matched_chunks else "No supporting context found."
86
- return f"Preview (first 500 chars):\n{text[:500]}", answer, context
87
-
88
- except Exception as e:
89
- # print("GRADIO ERROR:", str(e))
90
- return f"Error: {e}", "Error", "Error"
91
-
92
- iface = gr.Interface(
93
- fn=process_and_qa,
94
- inputs=[
95
- gr.File(label="Upload PDF, DOCX, TXT, or HTML"),
96
- gr.Textbox(label="Question"),
97
- ],
98
- outputs=[
99
- gr.Textbox(label="Extracted/Text Preview", lines=10, show_copy_button=True),
100
- gr.Textbox(label="Answer", lines=6, show_copy_button=True),
101
- gr.Textbox(label="Matched Context", lines=12, show_copy_button=True)
102
- ],
103
- title="Book/Document QA",
104
- description="Upload your document, ask a question, and see the answer with cited context!"
105
- )
106
-
107
- if __name__ == "__main__":
108
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app/logger.py DELETED
File without changes
app/main.py DELETED
@@ -1,12 +0,0 @@
1
- from fastapi import FastAPI
2
- from app.api.routes import router
3
-
4
- app = FastAPI(
5
- title="RAG Book QA System API",
6
- docs_url="/docs"
7
- )
8
- app.include_router(router)
9
-
10
- @app.get("/health")
11
- def health_check():
12
- return {"status": "ok"}