generateChatResponseAsync method
Implementation
Stream<ModelResponse> generateChatResponseAsync() async* {
debugPrint('InferenceChat: Starting async stream generation');
final buffer = StringBuffer();
// Smart function handling mode - continuous scanning for JSON patterns
String funcBuffer = '';
debugPrint('InferenceChat: Starting to iterate over native tokens...');
final originalStream = session.getResponseAsync().map((token) => TextResponse(token));
// Apply thinking filter if needed using ModelThinkingFilter
final Stream<ModelResponse> filteredStream =
isThinking ? ModelThinkingFilter.filterThinkingStream(originalStream, modelType: modelType) : originalStream;
await for (final response in filteredStream) {
if (response is TextResponse) {
final token = response.token;
debugPrint('InferenceChat: Received filtered token: "$token"');
// Track if this token should be added to buffer (default true)
bool shouldAddToBuffer = true;
// Continuous scanning for function calls in text - for models like DeepSeek
if (tools.isNotEmpty && supportsFunctionCalls) {
// Check if we're currently buffering potential JSON
if (funcBuffer.isNotEmpty) {
// We're already buffering - add token and check for completion
funcBuffer += token;
debugPrint('InferenceChat: Buffering token: "$token", total: ${funcBuffer.length} chars');
// Check if we now have a complete JSON
if (FunctionCallParser.isJsonComplete(funcBuffer)) {
// First try to extract message from any JSON with message field
try {
final jsonData = jsonDecode(funcBuffer);
if (jsonData is Map<String, dynamic> && jsonData.containsKey('message')) {
// Found JSON with message field - extract and display the message
final message = jsonData['message'] as String;
debugPrint('InferenceChat: Extracted message from JSON: "$message"');
yield TextResponse(message);
funcBuffer = '';
shouldAddToBuffer = false; // Don't add JSON tokens to buffer
continue;
}
} catch (e) {
debugPrint('InferenceChat: Failed to parse JSON for message extraction: $e');
}
// If no message field found, try parsing as function call
final functionCall = FunctionCallParser.parse(funcBuffer);
if (functionCall != null) {
debugPrint('InferenceChat: Found function call in complete buffer!');
yield functionCall;
funcBuffer = '';
shouldAddToBuffer = false; // Don't add function call tokens to buffer
continue;
} else {
// Not a valid JSON - emit as text and clear buffer
debugPrint('InferenceChat: Invalid JSON, emitting as text');
yield TextResponse(funcBuffer);
funcBuffer = '';
shouldAddToBuffer = false;
continue;
}
}
// If buffer gets too long without completing, flush as text
if (funcBuffer.length > _maxFunctionBufferLength) {
debugPrint('InferenceChat: Buffer too long without completion, flushing as text');
yield TextResponse(funcBuffer);
funcBuffer = '';
shouldAddToBuffer = false;
continue;
}
// Still buffering, don't emit yet
shouldAddToBuffer = false;
} else {
// Not currently buffering - check if this token starts JSON
if (token.contains('{') || token.contains('```')) {
debugPrint('InferenceChat: Found potential JSON start in token: "$token"');
funcBuffer = token;
shouldAddToBuffer = false; // Don't add to main buffer while we determine if it's JSON
} else {
// Normal text token - emit immediately
debugPrint('InferenceChat: Emitting text token: "$token"');
yield response;
shouldAddToBuffer = true; // Add to main buffer for history
}
}
} else {
// No function processing happening - emit token directly
debugPrint('InferenceChat: No function processing, emitting token as text: "$token"');
yield response;
shouldAddToBuffer = true; // Add to main buffer for history
}
// Add token to buffer only if it should be included in final message
if (shouldAddToBuffer) {
buffer.write(token);
}
} else {
// For non-TextResponse (like ThinkingResponse), pass through
yield response;
}
}
debugPrint('InferenceChat: Native token stream ended');
final response = buffer.toString();
debugPrint('InferenceChat: Complete response accumulated: "$response"');
// Handle end of stream - process any remaining buffer
if (funcBuffer.isNotEmpty) {
debugPrint('InferenceChat: Processing remaining buffer at end of stream: ${funcBuffer.length} chars');
// First try to extract message from JSON if it has message field
if (FunctionCallParser.isJsonComplete(funcBuffer)) {
try {
final jsonData = jsonDecode(funcBuffer);
if (jsonData is Map<String, dynamic> && jsonData.containsKey('message')) {
final message = jsonData['message'] as String;
debugPrint('InferenceChat: Extracted message from end-of-stream JSON: "$message"');
yield TextResponse(message);
} else {
// Try to parse as function call
final functionCall = FunctionCallParser.parse(funcBuffer);
if (functionCall != null) {
debugPrint('InferenceChat: Function call found at end of stream');
yield functionCall;
} else {
yield TextResponse(funcBuffer);
}
}
} catch (e) {
debugPrint('InferenceChat: Failed to parse end-of-stream JSON: $e');
yield TextResponse(funcBuffer);
}
} else {
debugPrint('InferenceChat: No complete JSON at end of stream, emitting remaining as text');
yield TextResponse(funcBuffer);
}
}
try {
debugPrint('InferenceChat: Calculating response tokens...');
final responseTokens = await session.sizeInTokens(response);
debugPrint('InferenceChat: Response tokens: $responseTokens');
_currentTokens += responseTokens;
debugPrint('InferenceChat: Current total tokens: $_currentTokens');
if (_currentTokens >= (maxTokens - tokenBuffer)) {
debugPrint('InferenceChat: Token limit reached, recreating session...');
await _recreateSessionWithReducedChunks();
debugPrint('InferenceChat: Session recreated successfully');
}
} catch (e) {
debugPrint('InferenceChat: Error during token calculation: $e');
}
try {
debugPrint('InferenceChat: Adding message to history...');
final chatMessage = Message(text: response, isUser: false);
debugPrint('InferenceChat: Created message object: ${chatMessage.text}');
_fullHistory.add(chatMessage);
debugPrint('InferenceChat: Added to full history');
_modelHistory.add(chatMessage);
debugPrint('InferenceChat: Added to model history');
debugPrint('InferenceChat: Message added to history successfully');
} catch (e) {
debugPrint('InferenceChat: Error adding message to history: $e');
rethrow;
}
debugPrint('InferenceChat: generateChatResponseAsync completed successfully');
}