generateChatResponseAsync method

Stream<ModelResponse> generateChatResponseAsync()
Implementation

Stream<ModelResponse> generateChatResponseAsync() async* {
  debugPrint('InferenceChat: Starting async stream generation');
  final buffer = StringBuffer();

  // Smart function handling mode - continuous scanning for JSON patterns
  String funcBuffer = '';

  debugPrint('InferenceChat: Starting to iterate over native tokens...');

  final originalStream = session.getResponseAsync().map((token) => TextResponse(token));

  // Apply thinking filter if needed using ModelThinkingFilter
  final Stream<ModelResponse> filteredStream =
      isThinking ? ModelThinkingFilter.filterThinkingStream(originalStream, modelType: modelType) : originalStream;

  await for (final response in filteredStream) {
    if (response is TextResponse) {
      final token = response.token;
      debugPrint('InferenceChat: Received filtered token: "$token"');

      // Track if this token should be added to buffer (default true)
      bool shouldAddToBuffer = true;

      // Continuous scanning for function calls in text - for models like DeepSeek
      if (tools.isNotEmpty && supportsFunctionCalls) {
        // Check if we're currently buffering potential JSON
        if (funcBuffer.isNotEmpty) {
          // We're already buffering - add token and check for completion
          funcBuffer += token;
          debugPrint('InferenceChat: Buffering token: "$token", total: ${funcBuffer.length} chars');

          // Check if we now have a complete JSON
          if (FunctionCallParser.isJsonComplete(funcBuffer)) {
            // First try to extract message from any JSON with message field
            try {
              final jsonData = jsonDecode(funcBuffer);
              if (jsonData is Map<String, dynamic> && jsonData.containsKey('message')) {
                // Found JSON with message field - extract and display the message
                final message = jsonData['message'] as String;
                debugPrint('InferenceChat: Extracted message from JSON: "$message"');
                yield TextResponse(message);
                funcBuffer = '';
                shouldAddToBuffer = false; // Don't add JSON tokens to buffer
                continue;
              }
            } catch (e) {
              debugPrint('InferenceChat: Failed to parse JSON for message extraction: $e');
            }

            // If no message field found, try parsing as function call
            final functionCall = FunctionCallParser.parse(funcBuffer);
            if (functionCall != null) {
              debugPrint('InferenceChat: Found function call in complete buffer!');
              yield functionCall;
              funcBuffer = '';
              shouldAddToBuffer = false; // Don't add function call tokens to buffer
              continue;
            } else {
              // Not a valid JSON - emit as text and clear buffer
              debugPrint('InferenceChat: Invalid JSON, emitting as text');
              yield TextResponse(funcBuffer);
              funcBuffer = '';
              shouldAddToBuffer = false;
              continue;
            }
          }

          // If buffer gets too long without completing, flush as text
          if (funcBuffer.length > _maxFunctionBufferLength) {
            debugPrint('InferenceChat: Buffer too long without completion, flushing as text');
            yield TextResponse(funcBuffer);
            funcBuffer = '';
            shouldAddToBuffer = false;
            continue;
          }

          // Still buffering, don't emit yet
          shouldAddToBuffer = false;
        } else {
          // Not currently buffering - check if this token starts JSON
          if (token.contains('{') || token.contains('```')) {
            debugPrint('InferenceChat: Found potential JSON start in token: "$token"');
            funcBuffer = token;
            shouldAddToBuffer = false; // Don't add to main buffer while we determine if it's JSON
          } else {
            // Normal text token - emit immediately
            debugPrint('InferenceChat: Emitting text token: "$token"');
            yield response;
            shouldAddToBuffer = true; // Add to main buffer for history
          }
        }
      } else {
        // No function processing happening - emit token directly
        debugPrint('InferenceChat: No function processing, emitting token as text: "$token"');
        yield response;
        shouldAddToBuffer = true; // Add to main buffer for history
      }

      // Add token to buffer only if it should be included in final message
      if (shouldAddToBuffer) {
        buffer.write(token);
      }
    } else {
      // For non-TextResponse (like ThinkingResponse), pass through
      yield response;
    }
  }

  debugPrint('InferenceChat: Native token stream ended');
  final response = buffer.toString();
  debugPrint('InferenceChat: Complete response accumulated: "$response"');

  // Handle end of stream - process any remaining buffer
  if (funcBuffer.isNotEmpty) {
    debugPrint('InferenceChat: Processing remaining buffer at end of stream: ${funcBuffer.length} chars');

    // First try to extract message from JSON if it has message field
    if (FunctionCallParser.isJsonComplete(funcBuffer)) {
      try {
        final jsonData = jsonDecode(funcBuffer);
        if (jsonData is Map<String, dynamic> && jsonData.containsKey('message')) {
          final message = jsonData['message'] as String;
          debugPrint('InferenceChat: Extracted message from end-of-stream JSON: "$message"');
          yield TextResponse(message);
        } else {
          // Try to parse as function call
          final functionCall = FunctionCallParser.parse(funcBuffer);
          if (functionCall != null) {
            debugPrint('InferenceChat: Function call found at end of stream');
            yield functionCall;
          } else {
            yield TextResponse(funcBuffer);
          }
        }
      } catch (e) {
        debugPrint('InferenceChat: Failed to parse end-of-stream JSON: $e');
        yield TextResponse(funcBuffer);
      }
    } else {
      debugPrint('InferenceChat: No complete JSON at end of stream, emitting remaining as text');
      yield TextResponse(funcBuffer);
    }
  }

  try {
    debugPrint('InferenceChat: Calculating response tokens...');
    final responseTokens = await session.sizeInTokens(response);
    debugPrint('InferenceChat: Response tokens: $responseTokens');
    _currentTokens += responseTokens;
    debugPrint('InferenceChat: Current total tokens: $_currentTokens');

    if (_currentTokens >= (maxTokens - tokenBuffer)) {
      debugPrint('InferenceChat: Token limit reached, recreating session...');
      await _recreateSessionWithReducedChunks();
      debugPrint('InferenceChat: Session recreated successfully');
    }
  } catch (e) {
    debugPrint('InferenceChat: Error during token calculation: $e');
  }

  try {
    debugPrint('InferenceChat: Adding message to history...');
    final chatMessage = Message(text: response, isUser: false);
    debugPrint('InferenceChat: Created message object: ${chatMessage.text}');
    _fullHistory.add(chatMessage);
    debugPrint('InferenceChat: Added to full history');
    _modelHistory.add(chatMessage);
    debugPrint('InferenceChat: Added to model history');
    debugPrint('InferenceChat: Message added to history successfully');
  } catch (e) {
    debugPrint('InferenceChat: Error adding message to history: $e');
    rethrow;
  }

  debugPrint('InferenceChat: generateChatResponseAsync completed successfully');
}
generateChatResponseAsync method

Implementation

InferenceChat class