Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -31,7 +31,7 @@ def download_pdf(url):
|
|
| 31 |
def custom_chunking(text, delimiter="\n\n"):
|
| 32 |
"""Splits text based on a specified delimiter."""
|
| 33 |
return text.split(delimiter)
|
| 34 |
-
|
| 35 |
def extract_text_from_pdf(pdf_bytes, document_id):
|
| 36 |
"""Extracts text from a PDF, page by page, and then chunks each page."""
|
| 37 |
pdf_file = io.BytesIO(pdf_bytes)
|
|
@@ -147,7 +147,7 @@ def calculate_confidence(query, context, answer):
|
|
| 147 |
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
|
| 148 |
context_embedding = embedding_model.encode([context], convert_to_numpy=True)
|
| 149 |
answer_embedding = embedding_model.encode([answer], convert_to_numpy=True)
|
| 150 |
-
|
| 151 |
query_context_similarity = np.dot(query_embedding, context_embedding.T).item()
|
| 152 |
context_answer_similarity = np.dot(context_embedding, answer_embedding.T).item()
|
| 153 |
confidence = (query_context_similarity + context_answer_similarity) / 2.0 # Equal weights
|
|
@@ -161,7 +161,7 @@ def generate_response(query, context):
|
|
| 161 |
- JUST PROVIDE ONLY THE ANSWER.
|
| 162 |
- Provide a elaborate, factual answer based strictly on the Context.
|
| 163 |
- Avoid generating Python code, solutions, or any irrelevant information.
|
| 164 |
-
Context: {context}
|
| 165 |
Question: {query}
|
| 166 |
Answer:"""
|
| 167 |
response = generator(prompt, max_new_tokens=500, num_return_sequences=1)[0]['generated_text']
|
|
@@ -256,4 +256,4 @@ with gr.Blocks() as demo:
|
|
| 256 |
|
| 257 |
iface.render()
|
| 258 |
|
| 259 |
-
demo.launch()
|
|
|
|
| 31 |
def custom_chunking(text, delimiter="\n\n"):
|
| 32 |
"""Splits text based on a specified delimiter."""
|
| 33 |
return text.split(delimiter)
|
| 34 |
+
|
| 35 |
def extract_text_from_pdf(pdf_bytes, document_id):
|
| 36 |
"""Extracts text from a PDF, page by page, and then chunks each page."""
|
| 37 |
pdf_file = io.BytesIO(pdf_bytes)
|
|
|
|
| 147 |
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
|
| 148 |
context_embedding = embedding_model.encode([context], convert_to_numpy=True)
|
| 149 |
answer_embedding = embedding_model.encode([answer], convert_to_numpy=True)
|
| 150 |
+
|
| 151 |
query_context_similarity = np.dot(query_embedding, context_embedding.T).item()
|
| 152 |
context_answer_similarity = np.dot(context_embedding, answer_embedding.T).item()
|
| 153 |
confidence = (query_context_similarity + context_answer_similarity) / 2.0 # Equal weights
|
|
|
|
| 161 |
- JUST PROVIDE ONLY THE ANSWER.
|
| 162 |
- Provide a elaborate, factual answer based strictly on the Context.
|
| 163 |
- Avoid generating Python code, solutions, or any irrelevant information.
|
| 164 |
+
Context: {context}
|
| 165 |
Question: {query}
|
| 166 |
Answer:"""
|
| 167 |
response = generator(prompt, max_new_tokens=500, num_return_sequences=1)[0]['generated_text']
|
|
|
|
| 256 |
|
| 257 |
iface.render()
|
| 258 |
|
| 259 |
+
demo.launch()
|