Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -120,7 +120,7 @@ class DOLPHIN:
|
|
| 120 |
do_sample=False,
|
| 121 |
num_beams=1,
|
| 122 |
repetition_penalty=1.1,
|
| 123 |
-
temperature=
|
| 124 |
)
|
| 125 |
|
| 126 |
sequences = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=False)
|
|
@@ -191,7 +191,7 @@ Provide a descriptive alt text in 1-2 sentences that is informative but not over
|
|
| 191 |
max_new_tokens=256,
|
| 192 |
disable_compile=True,
|
| 193 |
do_sample=False,
|
| 194 |
-
temperature=
|
| 195 |
pad_token_id=self.processor.tokenizer.pad_token_id,
|
| 196 |
eos_token_id=self.processor.tokenizer.eos_token_id
|
| 197 |
)
|
|
@@ -252,7 +252,7 @@ Provide a descriptive alt text in 1-2 sentences that is informative but not over
|
|
| 252 |
max_new_tokens=1024,
|
| 253 |
disable_compile=True,
|
| 254 |
do_sample=False,
|
| 255 |
-
temperature=
|
| 256 |
pad_token_id=self.processor.tokenizer.pad_token_id,
|
| 257 |
eos_token_id=self.processor.tokenizer.eos_token_id
|
| 258 |
)
|
|
@@ -982,31 +982,49 @@ with gr.Blocks(
|
|
| 982 |
return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please process a PDF document first before asking questions."}]
|
| 983 |
|
| 984 |
try:
|
| 985 |
-
#
|
| 986 |
-
|
| 987 |
-
|
| 988 |
-
context = "\n\n".join(relevant_chunks)
|
| 989 |
-
# Smart truncation: aim for ~6000 chars for local model
|
| 990 |
-
if len(context) > 6000:
|
| 991 |
-
# Try to cut at sentence boundaries
|
| 992 |
-
sentences = context[:6000].split('.')
|
| 993 |
-
context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:6000] + '...'
|
| 994 |
-
else:
|
| 995 |
-
# Fallback to truncated document if RAG fails
|
| 996 |
-
context = processed_markdown[:6000] + "..." if len(processed_markdown) > 6000 else processed_markdown
|
| 997 |
|
| 998 |
-
|
| 999 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1000 |
|
| 1001 |
Context from the document:
|
| 1002 |
{context}
|
| 1003 |
|
| 1004 |
Question: {message}
|
| 1005 |
|
| 1006 |
-
|
| 1007 |
-
|
| 1008 |
-
|
| 1009 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1010 |
return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response_text}]
|
| 1011 |
|
| 1012 |
except Exception as e:
|
|
|
|
| 120 |
do_sample=False,
|
| 121 |
num_beams=1,
|
| 122 |
repetition_penalty=1.1,
|
| 123 |
+
temperature=0.2
|
| 124 |
)
|
| 125 |
|
| 126 |
sequences = self.tokenizer.batch_decode(outputs.sequences, skip_special_tokens=False)
|
|
|
|
| 191 |
max_new_tokens=256,
|
| 192 |
disable_compile=True,
|
| 193 |
do_sample=False,
|
| 194 |
+
temperature=0.2,
|
| 195 |
pad_token_id=self.processor.tokenizer.pad_token_id,
|
| 196 |
eos_token_id=self.processor.tokenizer.eos_token_id
|
| 197 |
)
|
|
|
|
| 252 |
max_new_tokens=1024,
|
| 253 |
disable_compile=True,
|
| 254 |
do_sample=False,
|
| 255 |
+
temperature=0.2,
|
| 256 |
pad_token_id=self.processor.tokenizer.pad_token_id,
|
| 257 |
eos_token_id=self.processor.tokenizer.eos_token_id
|
| 258 |
)
|
|
|
|
| 982 |
return history + [{"role": "user", "content": message}, {"role": "assistant", "content": "❌ Please process a PDF document first before asking questions."}]
|
| 983 |
|
| 984 |
try:
|
| 985 |
+
# Check if it's a simple greeting or conversational message
|
| 986 |
+
greeting_words = ['hi', 'hello', 'hey', 'good morning', 'good afternoon', 'good evening', 'thanks', 'thank you']
|
| 987 |
+
is_greeting = any(greeting.lower() in message.lower() for greeting in greeting_words)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 988 |
|
| 989 |
+
if is_greeting and len(message.split()) <= 3:
|
| 990 |
+
# Handle simple greetings without RAG
|
| 991 |
+
if 'hi' in message.lower() or 'hello' in message.lower() or 'hey' in message.lower():
|
| 992 |
+
response_text = "Hello! I'm here to help you with questions about your processed document. What would you like to know?"
|
| 993 |
+
elif 'thank' in message.lower():
|
| 994 |
+
response_text = "You're welcome! Feel free to ask me anything about the document."
|
| 995 |
+
else:
|
| 996 |
+
response_text = "Hello! How can I help you understand the document better?"
|
| 997 |
+
else:
|
| 998 |
+
# Use RAG for document-related questions
|
| 999 |
+
if document_chunks and len(document_chunks) > 0:
|
| 1000 |
+
relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
|
| 1001 |
+
context = "\n\n".join(relevant_chunks)
|
| 1002 |
+
# Smart truncation: aim for ~6000 chars for local model
|
| 1003 |
+
if len(context) > 6000:
|
| 1004 |
+
# Try to cut at sentence boundaries
|
| 1005 |
+
sentences = context[:6000].split('.')
|
| 1006 |
+
context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:6000] + '...'
|
| 1007 |
+
else:
|
| 1008 |
+
# Fallback to truncated document if RAG fails
|
| 1009 |
+
context = processed_markdown[:6000] + "..." if len(processed_markdown) > 6000 else processed_markdown
|
| 1010 |
+
|
| 1011 |
+
# Create prompt for Gemma 3n
|
| 1012 |
+
prompt = f"""You are a helpful assistant that answers questions about documents. Answer concisely and directly based on the provided context. If the context doesn't contain relevant information, say so briefly and offer to help with other questions about the document.
|
| 1013 |
|
| 1014 |
Context from the document:
|
| 1015 |
{context}
|
| 1016 |
|
| 1017 |
Question: {message}
|
| 1018 |
|
| 1019 |
+
Answer:"""
|
| 1020 |
+
|
| 1021 |
+
# Generate response using local Gemma 3n
|
| 1022 |
+
response_text = gemma_model.chat(prompt)
|
| 1023 |
+
|
| 1024 |
+
# Clean up repetitive text and Korean characters
|
| 1025 |
+
response_text = response_text.split('답변:')[0].strip() # Remove Korean repetitions
|
| 1026 |
+
response_text = response_text.split('Answer:')[-1].strip() # Clean prompt artifacts
|
| 1027 |
+
|
| 1028 |
return history + [{"role": "user", "content": message}, {"role": "assistant", "content": response_text}]
|
| 1029 |
|
| 1030 |
except Exception as e:
|