Spaces:
Sleeping
Sleeping
refactor(app.py): :sparkles: Improve response handling and streamline configuration for model invocation
Browse files
app.py
CHANGED
|
@@ -78,13 +78,15 @@ def call_model(state: MessagesState, system_prompt: str):
|
|
| 78 |
pad_token_id=tokenizer.eos_token_id
|
| 79 |
)
|
| 80 |
|
| 81 |
-
#
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
| 85 |
|
| 86 |
# Convert the response to LangChain format
|
| 87 |
-
ai_message = AIMessage(content=
|
| 88 |
return {"messages": state["messages"] + [ai_message]}
|
| 89 |
|
| 90 |
# Define the graph
|
|
@@ -92,7 +94,6 @@ workflow = StateGraph(state_schema=MessagesState)
|
|
| 92 |
|
| 93 |
# Define the node in the graph
|
| 94 |
workflow.add_edge(START, "model")
|
| 95 |
-
workflow.add_node("model", call_model)
|
| 96 |
|
| 97 |
# Add memory
|
| 98 |
memory = MemorySaver()
|
|
@@ -149,10 +150,16 @@ async def generate(request: QueryRequest):
|
|
| 149 |
input_messages = [HumanMessage(content=request.query)]
|
| 150 |
|
| 151 |
# Invoke the graph with custom system prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
output = graph_app.invoke(
|
| 153 |
{"messages": input_messages},
|
| 154 |
-
|
| 155 |
-
{"model": {"system_prompt": request.system_prompt}}
|
| 156 |
)
|
| 157 |
|
| 158 |
# Get the model response
|
|
@@ -189,11 +196,16 @@ async def summarize(request: SummaryRequest):
|
|
| 189 |
# Create the input message
|
| 190 |
input_messages = [HumanMessage(content=request.text)]
|
| 191 |
|
| 192 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
output = graph_app.invoke(
|
| 194 |
{"messages": input_messages},
|
| 195 |
-
|
| 196 |
-
{"model": {"system_prompt": summary_system_prompt}}
|
| 197 |
)
|
| 198 |
|
| 199 |
# Get the model response
|
|
|
|
| 78 |
pad_token_id=tokenizer.eos_token_id
|
| 79 |
)
|
| 80 |
|
| 81 |
+
# Get just the new tokens (excluding the input prompt tokens)
|
| 82 |
+
input_length = inputs.shape[1]
|
| 83 |
+
generated_tokens = outputs[0][input_length:]
|
| 84 |
+
|
| 85 |
+
# Decode only the new tokens to get just the assistant's response
|
| 86 |
+
assistant_response = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
|
| 87 |
|
| 88 |
# Convert the response to LangChain format
|
| 89 |
+
ai_message = AIMessage(content=assistant_response)
|
| 90 |
return {"messages": state["messages"] + [ai_message]}
|
| 91 |
|
| 92 |
# Define the graph
|
|
|
|
| 94 |
|
| 95 |
# Define the node in the graph
|
| 96 |
workflow.add_edge(START, "model")
|
|
|
|
| 97 |
|
| 98 |
# Add memory
|
| 99 |
memory = MemorySaver()
|
|
|
|
| 150 |
input_messages = [HumanMessage(content=request.query)]
|
| 151 |
|
| 152 |
# Invoke the graph with custom system prompt
|
| 153 |
+
# Combine config parameters into a single dictionary
|
| 154 |
+
combined_config = {
|
| 155 |
+
**config,
|
| 156 |
+
"model": {"system_prompt": request.system_prompt}
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
# Invoke the graph with proper argument count
|
| 160 |
output = graph_app.invoke(
|
| 161 |
{"messages": input_messages},
|
| 162 |
+
combined_config
|
|
|
|
| 163 |
)
|
| 164 |
|
| 165 |
# Get the model response
|
|
|
|
| 196 |
# Create the input message
|
| 197 |
input_messages = [HumanMessage(content=request.text)]
|
| 198 |
|
| 199 |
+
# Combine config parameters into a single dictionary
|
| 200 |
+
combined_config = {
|
| 201 |
+
**config,
|
| 202 |
+
"model": {"system_prompt": summary_system_prompt}
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
# Invoke the graph with proper argument count
|
| 206 |
output = graph_app.invoke(
|
| 207 |
{"messages": input_messages},
|
| 208 |
+
combined_config
|
|
|
|
| 209 |
)
|
| 210 |
|
| 211 |
# Get the model response
|