Vanshcc commited on
Commit
674bf3b
·
verified ·
1 Parent(s): 9eed2af

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +31 -22
main.py CHANGED
@@ -1,9 +1,7 @@
1
  from fastapi import FastAPI, UploadFile, File
2
  import json
3
- import pypdf
4
- import io
5
 
6
- from db import conn, cursor
7
  from embeddings import semantic_chunking, embedding_model
8
  from retrieval import retrieve_top_chunks
9
  from llm import build_context_string, call_llm
@@ -16,24 +14,23 @@ def health():
16
 
17
  @app.post("/ingest")
18
  async def ingest(file: UploadFile = File(...)):
19
- if file.filename.endswith(".pdf"):
20
- pdf_reader = pypdf.PdfReader(io.BytesIO(await file.read()))
21
- text = ""
22
- for page in pdf_reader.pages:
23
- text += page.extract_text() + "\n"
24
- else:
25
- text = (await file.read()).decode("utf-8")
26
 
27
  chunks = semantic_chunking(text)
28
  embeddings = embedding_model.encode(chunks)
29
 
30
- for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
31
- cursor.execute(
32
- "INSERT INTO chunks (document, chunk_id, text, embedding) VALUES (%s, %s, %s, %s)",
33
- (file.filename, i, chunk, json.dumps(emb.tolist()))
34
- )
35
-
36
- conn.commit()
 
 
 
 
 
37
  return {"chunks_ingested": len(chunks)}
38
 
39
  @app.post("/ask")
@@ -80,12 +77,24 @@ def ask(question: str):
80
 
81
  @app.post("/reset")
82
  def reset_db():
83
- cursor.execute("TRUNCATE TABLE chunks")
84
- conn.commit()
 
 
 
 
 
 
85
  return {"status": "Database cleared"}
86
 
87
  @app.get("/view")
88
  def view_db(limit: int = 50):
89
- cursor.execute(f"SELECT id, document, chunk_id, left(text, 200) as text FROM chunks ORDER BY id DESC LIMIT {limit}")
90
- rows = cursor.fetchall()
91
- return [{"id": r[0], "document": r[1], "chunk_id": r[2], "text_snippet": r[3]} for r in rows]
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, UploadFile, File
2
  import json
 
 
3
 
4
+ from db import get_db_connection
5
  from embeddings import semantic_chunking, embedding_model
6
  from retrieval import retrieve_top_chunks
7
  from llm import build_context_string, call_llm
 
14
 
15
  @app.post("/ingest")
16
  async def ingest(file: UploadFile = File(...)):
17
+ text = (await file.read()).decode("utf-8")
 
 
 
 
 
 
18
 
19
  chunks = semantic_chunking(text)
20
  embeddings = embedding_model.encode(chunks)
21
 
22
+ conn = get_db_connection()
23
+ cursor = conn.cursor()
24
+ try:
25
+ for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
26
+ cursor.execute(
27
+ "INSERT INTO chunks (document, chunk_id, text, embedding) VALUES (%s, %s, %s, %s)",
28
+ (file.filename, i, chunk, json.dumps(emb.tolist()))
29
+ )
30
+ conn.commit()
31
+ finally:
32
+ cursor.close()
33
+ conn.close()
34
  return {"chunks_ingested": len(chunks)}
35
 
36
  @app.post("/ask")
 
77
 
78
  @app.post("/reset")
79
  def reset_db():
80
+ conn = get_db_connection()
81
+ cursor = conn.cursor()
82
+ try:
83
+ cursor.execute("TRUNCATE TABLE chunks")
84
+ conn.commit()
85
+ finally:
86
+ cursor.close()
87
+ conn.close()
88
  return {"status": "Database cleared"}
89
 
90
  @app.get("/view")
91
  def view_db(limit: int = 50):
92
+ conn = get_db_connection()
93
+ cursor = conn.cursor()
94
+ try:
95
+ cursor.execute(f"SELECT id, document, chunk_id, left(text, 200) as text FROM chunks ORDER BY id DESC LIMIT {limit}")
96
+ rows = cursor.fetchall()
97
+ return [{"id": r[0], "document": r[1], "chunk_id": r[2], "text_snippet": r[3]} for r in rows]
98
+ finally:
99
+ cursor.close()
100
+ conn.close()