Skip to content

azad.prompts.dialects.xml.parser Module

azad.prompts.dialects.xml.parser

Improved XML parser for real-time streaming content parsing.

Attributes

Classes

XMLDialectConfig

Bases: DialectConfig

Configuration for the XML parser.

Attributes
parameter_buffer_threshold class-attribute instance-attribute
parameter_buffer_threshold: int = Field(default=25, description='Emit parameter chunks immediately')
text_buffer_threshold class-attribute instance-attribute
text_buffer_threshold: int = Field(default=25, description='Min characters for text chunks')
min_emit_interval_ms class-attribute instance-attribute
min_emit_interval_ms: int = Field(default=20, description='No delay between emissions')
max_parameter_buffer_size class-attribute instance-attribute
max_parameter_buffer_size: int = Field(default=50, description='Max buffer size before forced emission')
max_tag_buffer_size class-attribute instance-attribute
max_tag_buffer_size: int = Field(default=200, description='Max size for a tag buffer before treating as content')
prefix class-attribute instance-attribute
prefix: str = Field(default='', description='Prefix for XML tags')

ParserContext dataclass

ParserContext(id: str, ts: float, tool_name: str, current_param: Optional[str] = None, tool_params: Dict[str, str] = dict(), param_buffer: Dict[str, str] = dict(), param_parts: Dict[str, List[str]] = dict(), param_last_emit: Dict[str, float] = dict(), nestingLevel: int = 1, paramNestingLevel: Dict[str, int] = dict())

Context object to track the current state of a tool being parsed.

Attributes
current_param class-attribute instance-attribute
current_param: Optional[str] = None
tool_params class-attribute instance-attribute
tool_params: Dict[str, str] = field(default_factory=dict)
param_buffer class-attribute instance-attribute
param_buffer: Dict[str, str] = field(default_factory=dict)
param_parts class-attribute instance-attribute
param_parts: Dict[str, List[str]] = field(default_factory=dict)
param_last_emit class-attribute instance-attribute
param_last_emit: Dict[str, float] = field(default_factory=dict)
paramNestingLevel class-attribute instance-attribute
paramNestingLevel: Dict[str, int] = field(default_factory=dict)

XMLDialectParser

XMLDialectParser(tool_schemas: Dict[str, Dict[str, Any]], config: XMLDialectConfig)

Bases: DialectParser

Improved XML dialect parser for real-time streaming content parsing.

Initialize the XML parser with tool schemas and configuration.

Source code in azad/prompts/dialects/xml/parser.py
def __init__(self, tool_schemas: Dict[str, Dict[str, Any]], config: XMLDialectConfig):
    """Initialize the XML parser with tool schemas and configuration."""
    # Initialize schema if needed
    self.tool_schemas = tool_schemas
    self.config = config
    self.reset()
Attributes
cdata_detection_buffer class-attribute instance-attribute
cdata_detection_buffer: str = Field(default='', exclude=True)
tag_buffer class-attribute instance-attribute
tag_buffer: str = Field(default='', exclude=True)
in_tag class-attribute instance-attribute
in_tag: bool = Field(default=False, exclude=True)
text_buffer class-attribute instance-attribute
text_buffer: str = Field(default='', exclude=True)
sent_text_start class-attribute instance-attribute
sent_text_start: bool = Field(default=False, exclude=True)
current_context class-attribute instance-attribute
current_context: Optional[ParserContext] = Field(default=None, exclude=True)
in_cdata class-attribute instance-attribute
in_cdata: bool = Field(default=False, exclude=True)
cdata_buffer class-attribute instance-attribute
cdata_buffer: str = Field(default='', exclude=True)
tool_schemas class-attribute instance-attribute
tool_schemas: Dict[str, Dict[str, Any]] = tool_schemas
Classes
Functions
feed
feed(data: bytes) -> List[AINetworkEventUnion]

Process incoming data and emit appropriate events.

Source code in azad/prompts/dialects/xml/parser.py
def feed(self, data: bytes) -> List[AINetworkEventUnion]:
    """Process incoming data and emit appropriate events."""
    text = data.decode('utf-8')
    events: List[AINetworkEventUnion] = []

    # Process character by character
    for char in text:
        events.extend(self._process_char(char))

        # Check for time-based flush
        if self.current_context and self.current_context.current_param:
            events.extend(self._check_time_based_flush())

    # Always emit accumulated text
    if self.text_buffer:
        if not self.sent_text_start:
            events.append(AINetworkEventTextStart())
            self.sent_text_start = True

        events.append(AINetworkEventTextChunk(content=self.text_buffer))
        self.text_buffer = ""

    return events
end_parsing
end_parsing() -> List[AINetworkEventUnion]

End parsing and handle any unclosed elements.

Source code in azad/prompts/dialects/xml/parser.py
def end_parsing(self) -> List[AINetworkEventUnion]:
    """End parsing and handle any unclosed elements."""
    events: List[AINetworkEventUnion] = []

    # Process any pending buffer as text
    if self.tag_buffer:
        if self.current_context and self.current_context.current_param:
            param_key = self.current_context.current_param
            if param_key not in self.current_context.param_buffer:
                self.current_context.param_buffer[param_key] = ""
            if param_key not in self.current_context.param_parts:
                self.current_context.param_parts[param_key] = [""]

            self.current_context.param_buffer[param_key] += self.tag_buffer
            if not self.current_context.param_parts[param_key]:
                self.current_context.param_parts[param_key].append("")
            self.current_context.param_parts[param_key][-1] += self.tag_buffer
        else:
            self.text_buffer += self.tag_buffer
        # If we're inside a tool context but not in a parameter, ignore the buffer content
        self.tag_buffer = ""

    # Normalize text buffer: collapse multiple consecutive newlines
    if self.text_buffer:
        import re
        self.text_buffer = re.sub(r'\n{5,}', '\n\n\n\n', self.text_buffer)

    # Flush text buffer
    if self.text_buffer:
        if not self.sent_text_start:
            events.append(AINetworkEventTextStart())
            self.sent_text_start = True

        events.append(AINetworkEventTextChunk(content=self.text_buffer))
        events.append(AINetworkEventTextEnd())
        self.text_buffer = ""
        self.sent_text_start = False
    elif self.sent_text_start:
        # Always end text if we started it
        events.append(AINetworkEventTextEnd())
        self.sent_text_start = False

    # Handle unclosed tool context
    if self.current_context:
        # Handle unclosed parameter
        if self.current_context.current_param:
            param_key = self.current_context.current_param

            # Emit any remaining parts
            if param_key in self.current_context.param_parts and self.current_context.param_parts[param_key] and self.current_context.param_parts[param_key][-1]:
                # Double sanitize CDATA markers
                part_content = self._sanitize_cdata(self.current_context.param_parts[param_key][-1])
                # Second pass for extra safety
                part_content = self._sanitize_cdata(part_content)

                events.append(AINetworkEventParameterChunk(
                    parameter=param_key,
                    content=part_content,
                    tool_call_id=self.current_context.id
                ))

            # Store parameter value (double sanitized)
            if param_key in self.current_context.param_buffer:
                value = self._sanitize_cdata(self.current_context.param_buffer[param_key])
                # Second pass to ensure all CDATA markers are removed
                value = self._sanitize_cdata(value)
                self.current_context.tool_params[param_key] = value

            # Emit parameter end
            events.append(AINetworkEventParameterEnd(parameter=param_key))

        # Force auto content for thinking tool
        if self.current_context.tool_name == "thinking" and "content" not in self.current_context.tool_params:
            self.current_context.tool_params["content"] = ""

        # Sanitize all parameter values in tool_params
        for param, value in self.current_context.tool_params.items():
            self.current_context.tool_params[param] = self._sanitize_cdata(value)

        # Emit tool complete events
        events.append(AINetworkEventParametersComplete(tool_call_id=self.current_context.id))
        events.append(AINetworkEventToolReady(
            tool_name=self.current_context.tool_name,
            tool_call_id=self.current_context.id,
            args=self.current_context.tool_params
        ))

        # Clear current context
        self.current_context = None

    return events
reset
reset() -> None

Reset the parser state.

Source code in azad/prompts/dialects/xml/parser.py
def reset(self) -> None:
    """Reset the parser state."""
    self.cdata_detection_buffer = ""
    self.tag_buffer = ""
    self.in_tag = False
    self.text_buffer = ""
    self.sent_text_start = False
    self.current_context = None

    self.in_cdata = False
    self.cdata_buffer = ""
feed_tool_call_delta
feed_tool_call_delta(tool_call: ChatCompletionDeltaToolCall) -> list[AINetworkEventUnion]
Source code in azad/prompts/base_dialect.py
def feed_tool_call_delta(self, tool_call: litellm.types.utils.ChatCompletionDeltaToolCall) -> list[AINetworkEventUnion]:
    return NotImplemented

Functions

debug

debug(msg)

Print debug information if DEBUG is True.

Source code in azad/prompts/dialects/xml/parser.py
def debug(msg):
    """Print debug information if DEBUG is True."""
    if DEBUG:
        print(f"DEBUG: {msg}")

fix_thinking_tool_schemas

fix_thinking_tool_schemas(tool_schemas: Dict[str, Any]) -> Dict[str, Any]

Ensure thinking tool has content as a parameter.

Source code in azad/prompts/dialects/xml/parser.py
def fix_thinking_tool_schemas(tool_schemas: Dict[str, Any]) -> Dict[str, Any]:
    """Ensure thinking tool has content as a parameter."""
    if "thinking" in tool_schemas:
        if "parameters" not in tool_schemas["thinking"]:
            tool_schemas["thinking"]["parameters"] = ["content"]
        elif "content" not in tool_schemas["thinking"]["parameters"]:
            tool_schemas["thinking"]["parameters"].append("content")
    return tool_schemas