deepkansara-123 commited on
Commit
e4b0154
Β·
verified Β·
1 Parent(s): ff84f29

Upload 6 files

Browse files
Files changed (2) hide show
  1. app.py +25 -10
  2. database1.py +2 -5
app.py CHANGED
@@ -47,7 +47,7 @@ def upload_pdf(files):
47
  return f"❌ Error: {str(e)}"
48
 
49
 
50
- # βœ… Generate Q&A using filename
51
  def generate_qa(filename):
52
  try:
53
  with sqlite3.connect("my_database.db") as conn:
@@ -65,16 +65,19 @@ def generate_qa(filename):
65
  questions = qgen.generate(chunk)
66
  if not questions:
67
  continue
68
- question = questions[0]
69
- prompt = f"Context: {chunk}\n\nQuestion: {question}\n\nAnswer:"
70
- result = qa_model(prompt, max_length=256, do_sample=False)
71
- answer = result[0]["generated_text"].strip()
72
- qa_pairs.append(f"Q: {question}\nA: {answer}")
73
- return "\n\n".join(qa_pairs)
 
 
74
  except Exception as e:
75
  return f"❌ Error: {str(e)}"
76
 
77
 
 
78
  # βœ… Ask question using token (semantic similarity)
79
  def ask_question(token, question):
80
  try:
@@ -89,17 +92,29 @@ def ask_question(token, question):
89
  chunks = json.loads(row[0])
90
  processor = pdf_query()
91
  model = processor.model
92
- chunk_embeddings = model.encode(chunks)
 
 
 
 
 
93
  q_embedding = model.encode([question])
94
  scores = cosine_similarity(q_embedding, chunk_embeddings)[0]
 
95
  top_index = int(np.argmax(scores))
96
  top_score = float(scores[top_index])
97
- best_text = re.sub(r'\s+', ' ', chunks[top_index].strip())
98
 
99
  if top_score >= 0.5:
100
  return f"Q: {question}\nA: {best_text}\nScore: {round(top_score, 3)}"
101
  else:
102
- return "⚠️ No relevant answer found (score too low)."
 
 
 
 
 
 
103
  except Exception as e:
104
  return f"❌ Error: {str(e)}"
105
 
 
47
  return f"❌ Error: {str(e)}"
48
 
49
 
50
+
51
  def generate_qa(filename):
52
  try:
53
  with sqlite3.connect("my_database.db") as conn:
 
65
  questions = qgen.generate(chunk)
66
  if not questions:
67
  continue
68
+
69
+ for question in questions[:2]: # generate up to 2 Q&A per chunk
70
+ prompt = f"Context: {chunk}\n\nQuestion: {question}\n\nAnswer:"
71
+ result = qa_model(prompt, max_length=256, do_sample=False)
72
+ answer = result[0]["generated_text"].strip()
73
+ qa_pairs.append(f"Q: {question}\nA: {answer}")
74
+
75
+ return "\n\n".join(qa_pairs) if qa_pairs else "⚠️ No Q&A pairs generated."
76
  except Exception as e:
77
  return f"❌ Error: {str(e)}"
78
 
79
 
80
+
81
  # βœ… Ask question using token (semantic similarity)
82
  def ask_question(token, question):
83
  try:
 
92
  chunks = json.loads(row[0])
93
  processor = pdf_query()
94
  model = processor.model
95
+
96
+ clean_chunks = [re.sub(r'\s+', ' ', c.strip()) for c in chunks if c.strip()]
97
+ if not clean_chunks:
98
+ return "⚠️ No valid content found in PDF."
99
+
100
+ chunk_embeddings = model.encode(clean_chunks)
101
  q_embedding = model.encode([question])
102
  scores = cosine_similarity(q_embedding, chunk_embeddings)[0]
103
+
104
  top_index = int(np.argmax(scores))
105
  top_score = float(scores[top_index])
106
+ best_text = clean_chunks[top_index]
107
 
108
  if top_score >= 0.5:
109
  return f"Q: {question}\nA: {best_text}\nScore: {round(top_score, 3)}"
110
  else:
111
+ # Fallback: show top 3 answers for transparency
112
+ top_indices = np.argsort(scores)[::-1][:3]
113
+ result = f"⚠️ Low score ({round(top_score, 3)}). Showing top 3 suggestions:\n\n"
114
+ for i in top_indices:
115
+ score = round(float(scores[i]), 3)
116
+ result += f"πŸ“Œ Score: {score}\n➑️ {clean_chunks[i][:300]}...\n\n"
117
+ return result
118
  except Exception as e:
119
  return f"❌ Error: {str(e)}"
120
 
database1.py CHANGED
@@ -6,7 +6,6 @@ class create_db:
6
  conn = sqlite3.connect('my_database.db')
7
  cursor = conn.cursor()
8
 
9
- # Only store into this table
10
  cursor.execute("""
11
  CREATE TABLE IF NOT EXISTS token_data (
12
  token_id TEXT PRIMARY KEY,
@@ -24,9 +23,9 @@ class create_db:
24
  (token, chunk_json, filename, full_content)
25
  )
26
  conn.commit()
27
- print({"message": "PDF uploaded and stored successfully"})
28
  except sqlite3.IntegrityError:
29
- print({"error": "Token already exists."})
30
 
31
  conn.close()
32
 
@@ -37,6 +36,4 @@ class create_db:
37
  cursor.execute("SELECT filename FROM token_data")
38
  rows = cursor.fetchall()
39
  conn.close()
40
-
41
  return {"pdfs": [{"filename": row[0]} for row in rows]}
42
-
 
6
  conn = sqlite3.connect('my_database.db')
7
  cursor = conn.cursor()
8
 
 
9
  cursor.execute("""
10
  CREATE TABLE IF NOT EXISTS token_data (
11
  token_id TEXT PRIMARY KEY,
 
23
  (token, chunk_json, filename, full_content)
24
  )
25
  conn.commit()
26
+ print({"message": f"βœ… {filename} uploaded and stored successfully"})
27
  except sqlite3.IntegrityError:
28
+ print({"error": f"❌ Token already exists for: {filename}"})
29
 
30
  conn.close()
31
 
 
36
  cursor.execute("SELECT filename FROM token_data")
37
  rows = cursor.fetchall()
38
  conn.close()
 
39
  return {"pdfs": [{"filename": row[0]} for row in rows]}