Spaces:

Vanshcc
/

qa-rag-mysql

Runtime error

App Files Files Community

Vanshcc commited on Jan 15

Commit

674bf3b

verified ·

1 Parent(s): 9eed2af

Update main.py

Browse files

Files changed (1) hide show

main.py +31 -22

main.py CHANGED Viewed

@@ -1,9 +1,7 @@
 from fastapi import FastAPI, UploadFile, File
 import json
-import pypdf
-import io
-from db import conn, cursor
 from embeddings import semantic_chunking, embedding_model
 from retrieval import retrieve_top_chunks
 from llm import build_context_string, call_llm
@@ -16,24 +14,23 @@ def health():
 @app.post("/ingest")
 async def ingest(file: UploadFile = File(...)):
-    if file.filename.endswith(".pdf"):
-        pdf_reader = pypdf.PdfReader(io.BytesIO(await file.read()))
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text() + "\n"
-    else:
-        text = (await file.read()).decode("utf-8")
     chunks = semantic_chunking(text)
     embeddings = embedding_model.encode(chunks)
-    for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
-        cursor.execute(
-            "INSERT INTO chunks (document, chunk_id, text, embedding) VALUES (%s, %s, %s, %s)",
-            (file.filename, i, chunk, json.dumps(emb.tolist()))
-        )
-    conn.commit()
     return {"chunks_ingested": len(chunks)}
 @app.post("/ask")
@@ -80,12 +77,24 @@ def ask(question: str):
 @app.post("/reset")
 def reset_db():
-    cursor.execute("TRUNCATE TABLE chunks")
-    conn.commit()
     return {"status": "Database cleared"}
 @app.get("/view")
 def view_db(limit: int = 50):
-    cursor.execute(f"SELECT id, document, chunk_id, left(text, 200) as text FROM chunks ORDER BY id DESC LIMIT {limit}")
-    rows = cursor.fetchall()
-    return [{"id": r[0], "document": r[1], "chunk_id": r[2], "text_snippet": r[3]} for r in rows]

 from fastapi import FastAPI, UploadFile, File
 import json
+from db import get_db_connection
 from embeddings import semantic_chunking, embedding_model
 from retrieval import retrieve_top_chunks
 from llm import build_context_string, call_llm
 @app.post("/ingest")
 async def ingest(file: UploadFile = File(...)):
+    text = (await file.read()).decode("utf-8")
     chunks = semantic_chunking(text)
     embeddings = embedding_model.encode(chunks)
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    try:
+        for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
+            cursor.execute(
+                "INSERT INTO chunks (document, chunk_id, text, embedding) VALUES (%s, %s, %s, %s)",
+                (file.filename, i, chunk, json.dumps(emb.tolist()))
+            )
+        conn.commit()
+    finally:
+        cursor.close()
+        conn.close()
     return {"chunks_ingested": len(chunks)}
 @app.post("/ask")
 @app.post("/reset")
 def reset_db():
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    try:
+        cursor.execute("TRUNCATE TABLE chunks")
+        conn.commit()
+    finally:
+        cursor.close()
+        conn.close()
     return {"status": "Database cleared"}
 @app.get("/view")
 def view_db(limit: int = 50):
+    conn = get_db_connection()
+    cursor = conn.cursor()
+    try:
+        cursor.execute(f"SELECT id, document, chunk_id, left(text, 200) as text FROM chunks ORDER BY id DESC LIMIT {limit}")
+        rows = cursor.fetchall()
+        return [{"id": r[0], "document": r[1], "chunk_id": r[2], "text_snippet": r[3]} for r in rows]
+    finally:
+        cursor.close()
+        conn.close()