import asyncio
import logging
import re
import sys
import traceback
import warnings
from kafka.coordinator.assignors.roundrobin import RoundRobinPartitionAssignor
from aiokafka.abc import ConsumerRebalanceListener
from aiokafka.client import AIOKafkaClient
from aiokafka.errors import (
TopicAuthorizationFailedError, OffsetOutOfRangeError,
ConsumerStoppedError, IllegalOperation, UnsupportedVersionError,
IllegalStateError, NoOffsetForPartitionError, RecordTooLargeError
)
from aiokafka.structs import TopicPartition
from aiokafka.util import (
commit_structure_validate, get_running_loop
)
from aiokafka import __version__
from .fetcher import Fetcher, OffsetResetStrategy
from .group_coordinator import GroupCoordinator, NoGroupCoordinator
from .subscription_state import SubscriptionState
log = logging.getLogger(__name__)
[docs]class AIOKafkaConsumer:
"""
A client that consumes records from a Kafka cluster.
The consumer will transparently handle the failure of servers in the Kafka
cluster, and adapt as topic-partitions are created or migrate between
brokers.
It also interacts with the assigned Kafka Group Coordinator node to allow
multiple consumers to load balance consumption of topics (feature of Kafka
>= 0.9.0.0).
.. _kip-62:
https://cwiki.apache.org/confluence/display/KAFKA/KIP-62%3A+Allow+consumer+to+send+heartbeats+from+a+background+thread
Arguments:
*topics (list(str)): optional list of topics to subscribe to. If not set,
call :meth:`.subscribe` or :meth:`.assign` before consuming records.
Passing topics directly is same as calling :meth:`.subscribe` API.
bootstrap_servers (str, list(str)): a ``host[:port]`` string (or list of
``host[:port]`` strings) that the consumer should contact to bootstrap
initial cluster metadata.
This does not have to be the full node list.
It just needs to have at least one broker that will respond to a
Metadata API Request. Default port is 9092. If no servers are
specified, will default to ``localhost:9092``.
client_id (str): a name for this client. This string is passed in
each request to servers and can be used to identify specific
server-side log entries that correspond to this client. Also
submitted to :class:`~.consumer.group_coordinator.GroupCoordinator`
for logging with respect to consumer group administration. Default:
``aiokafka-{version}``
group_id (str or None): name of the consumer group to join for dynamic
partition assignment (if enabled), and to use for fetching and
committing offsets. If None, auto-partition assignment (via
group coordinator) and offset commits are disabled.
Default: None
key_deserializer (Callable): Any callable that takes a
raw message key and returns a deserialized key.
value_deserializer (Callable, Optional): Any callable that takes a
raw message value and returns a deserialized value.
fetch_min_bytes (int): Minimum amount of data the server should
return for a fetch request, otherwise wait up to
`fetch_max_wait_ms` for more data to accumulate. Default: 1.
fetch_max_bytes (int): The maximum amount of data the server should
return for a fetch request. This is not an absolute maximum, if
the first message in the first non-empty partition of the fetch
is larger than this value, the message will still be returned
to ensure that the consumer can make progress. NOTE: consumer
performs fetches to multiple brokers in parallel so memory
usage will depend on the number of brokers containing
partitions for the topic.
Supported Kafka version >= 0.10.1.0. Default: 52428800 (50 Mb).
fetch_max_wait_ms (int): The maximum amount of time in milliseconds
the server will block before answering the fetch request if
there isn't sufficient data to immediately satisfy the
requirement given by fetch_min_bytes. Default: 500.
max_partition_fetch_bytes (int): The maximum amount of data
per-partition the server will return. The maximum total memory
used for a request ``= #partitions * max_partition_fetch_bytes``.
This size must be at least as large as the maximum message size
the server allows or else it is possible for the producer to
send messages larger than the consumer can fetch. If that
happens, the consumer can get stuck trying to fetch a large
message on a certain partition. Default: 1048576.
max_poll_records (int): The maximum number of records returned in a
single call to :meth:`.getmany`. Defaults ``None``, no limit.
request_timeout_ms (int): Client request timeout in milliseconds.
Default: 40000.
retry_backoff_ms (int): Milliseconds to backoff when retrying on
errors. Default: 100.
auto_offset_reset (str): A policy for resetting offsets on
:exc:`.OffsetOutOfRangeError` errors: ``earliest`` will move to the oldest
available message, ``latest`` will move to the most recent, and
``none`` will raise an exception so you can handle this case.
Default: ``latest``.
enable_auto_commit (bool): If true the consumer's offset will be
periodically committed in the background. Default: True.
auto_commit_interval_ms (int): milliseconds between automatic
offset commits, if enable_auto_commit is True. Default: 5000.
check_crcs (bool): Automatically check the CRC32 of the records
consumed. This ensures no on-the-wire or on-disk corruption to
the messages occurred. This check adds some overhead, so it may
be disabled in cases seeking extreme performance. Default: True
metadata_max_age_ms (int): The period of time in milliseconds after
which we force a refresh of metadata even if we haven't seen any
partition leadership changes to proactively discover any new
brokers or partitions. Default: 300000
partition_assignment_strategy (list): List of objects to use to
distribute partition ownership amongst consumer instances when
group management is used. This preference is implicit in the order
of the strategies in the list. When assignment strategy changes:
to support a change to the assignment strategy, new versions must
enable support both for the old assignment strategy and the new
one. The coordinator will choose the old assignment strategy until
all members have been updated. Then it will choose the new
strategy. Default: [:class:`.RoundRobinPartitionAssignor`]
max_poll_interval_ms (int): Maximum allowed time between calls to
consume messages (e.g., :meth:`.getmany`). If this interval
is exceeded the consumer is considered failed and the group will
rebalance in order to reassign the partitions to another consumer
group member. If API methods block waiting for messages, that time
does not count against this timeout. See `KIP-62`_ for more
information. Default 300000
rebalance_timeout_ms (int): The maximum time server will wait for this
consumer to rejoin the group in a case of rebalance. In Java client
this behaviour is bound to `max.poll.interval.ms` configuration,
but as ``aiokafka`` will rejoin the group in the background, we
decouple this setting to allow finer tuning by users that use
:class:`.ConsumerRebalanceListener` to delay rebalacing. Defaults
to ``session_timeout_ms``
session_timeout_ms (int): Client group session and failure detection
timeout. The consumer sends periodic heartbeats
(`heartbeat.interval.ms`) to indicate its liveness to the broker.
If no hearts are received by the broker for a group member within
the session timeout, the broker will remove the consumer from the
group and trigger a rebalance. The allowed range is configured with
the **broker** configuration properties
`group.min.session.timeout.ms` and `group.max.session.timeout.ms`.
Default: 10000
heartbeat_interval_ms (int): The expected time in milliseconds
between heartbeats to the consumer coordinator when using
Kafka's group management feature. Heartbeats are used to ensure
that the consumer's session stays active and to facilitate
rebalancing when new consumers join or leave the group. The
value must be set lower than `session_timeout_ms`, but typically
should be set no higher than 1/3 of that value. It can be
adjusted even lower to control the expected time for normal
rebalances. Default: 3000
consumer_timeout_ms (int): maximum wait timeout for background fetching
routine. Mostly defines how fast the system will see rebalance and
request new data for new partitions. Default: 200
api_version (str): specify which kafka API version to use.
:class:`AIOKafkaConsumer` supports Kafka API versions >=0.9 only.
If set to ``auto``, will attempt to infer the broker version by
probing various APIs. Default: ``auto``
security_protocol (str): Protocol used to communicate with brokers.
Valid values are: ``PLAINTEXT``, ``SSL``. Default: ``PLAINTEXT``.
ssl_context (ssl.SSLContext): pre-configured :class:`~ssl.SSLContext`
for wrapping socket connections. Directly passed into asyncio's
:meth:`~asyncio.loop.create_connection`. For more information see
:ref:`ssl_auth`. Default: None.
exclude_internal_topics (bool): Whether records from internal topics
(such as offsets) should be exposed to the consumer. If set to True
the only way to receive records from an internal topic is
subscribing to it. Requires 0.10+ Default: True
connections_max_idle_ms (int): Close idle connections after the number
of milliseconds specified by this config. Specifying `None` will
disable idle checks. Default: 540000 (9 minutes).
isolation_level (str): Controls how to read messages written
transactionally.
If set to ``read_committed``, :meth:`.getmany` will only return
transactional messages which have been committed.
If set to ``read_uncommitted`` (the default), :meth:`.getmany` will
return all messages, even transactional messages which have been
aborted.
Non-transactional messages will be returned unconditionally in
either mode.
Messages will always be returned in offset order. Hence, in
`read_committed` mode, :meth:`.getmany` will only return
messages up to the last stable offset (LSO), which is the one less
than the offset of the first open transaction. In particular any
messages appearing after messages belonging to ongoing transactions
will be withheld until the relevant transaction has been completed.
As a result, `read_committed` consumers will not be able to read up
to the high watermark when there are in flight transactions.
Further, when in `read_committed` the seek_to_end method will
return the LSO. See method docs below. Default: ``read_uncommitted``
sasl_mechanism (str): Authentication mechanism when security_protocol
is configured for ``SASL_PLAINTEXT`` or ``SASL_SSL``. Valid values are:
``PLAIN``, ``GSSAPI``, ``SCRAM-SHA-256``, ``SCRAM-SHA-512``,
``OAUTHBEARER``.
Default: ``PLAIN``
sasl_plain_username (str): username for SASL ``PLAIN`` authentication.
Default: None
sasl_plain_password (str): password for SASL ``PLAIN`` authentication.
Default: None
sasl_oauth_token_provider (~aiokafka.abc.AbstractTokenProvider):
OAuthBearer token provider instance. (See :mod:`kafka.oauth.abstract`).
Default: None
Note:
Many configuration parameters are taken from Java Client:
https://kafka.apache.org/documentation.html#newconsumerconfigs
"""
_closed = None # Serves as an uninitialized flag for __del__
_source_traceback = None
def __init__(self, *topics, loop=None,
bootstrap_servers='localhost',
client_id='aiokafka-' + __version__,
group_id=None,
key_deserializer=None, value_deserializer=None,
fetch_max_wait_ms=500,
fetch_max_bytes=52428800,
fetch_min_bytes=1,
max_partition_fetch_bytes=1 * 1024 * 1024,
request_timeout_ms=40 * 1000,
retry_backoff_ms=100,
auto_offset_reset='latest',
enable_auto_commit=True,
auto_commit_interval_ms=5000,
check_crcs=True,
metadata_max_age_ms=5 * 60 * 1000,
partition_assignment_strategy=(RoundRobinPartitionAssignor,),
max_poll_interval_ms=300000,
rebalance_timeout_ms=None,
session_timeout_ms=10000,
heartbeat_interval_ms=3000,
consumer_timeout_ms=200,
max_poll_records=None,
ssl_context=None,
security_protocol='PLAINTEXT',
api_version='auto',
exclude_internal_topics=True,
connections_max_idle_ms=540000,
isolation_level="read_uncommitted",
sasl_mechanism="PLAIN",
sasl_plain_password=None,
sasl_plain_username=None,
sasl_kerberos_service_name='kafka',
sasl_kerberos_domain_name=None,
sasl_oauth_token_provider=None):
if loop is None:
loop = get_running_loop()
else:
warnings.warn("The loop argument is deprecated since 0.7.1, "
"and scheduled for removal in 0.8.0",
DeprecationWarning, stacklevel=2)
if max_poll_records is not None and (
not isinstance(max_poll_records, int) or max_poll_records < 1):
raise ValueError("`max_poll_records` should be positive Integer")
if rebalance_timeout_ms is None:
rebalance_timeout_ms = session_timeout_ms
self._client = AIOKafkaClient(
loop=loop, bootstrap_servers=bootstrap_servers,
client_id=client_id, metadata_max_age_ms=metadata_max_age_ms,
request_timeout_ms=request_timeout_ms,
retry_backoff_ms=retry_backoff_ms,
api_version=api_version,
ssl_context=ssl_context,
security_protocol=security_protocol,
connections_max_idle_ms=connections_max_idle_ms,
sasl_mechanism=sasl_mechanism,
sasl_plain_username=sasl_plain_username,
sasl_plain_password=sasl_plain_password,
sasl_kerberos_service_name=sasl_kerberos_service_name,
sasl_kerberos_domain_name=sasl_kerberos_domain_name,
sasl_oauth_token_provider=sasl_oauth_token_provider)
self._group_id = group_id
self._heartbeat_interval_ms = heartbeat_interval_ms
self._session_timeout_ms = session_timeout_ms
self._retry_backoff_ms = retry_backoff_ms
self._auto_offset_reset = auto_offset_reset
self._request_timeout_ms = request_timeout_ms
self._enable_auto_commit = enable_auto_commit
self._auto_commit_interval_ms = auto_commit_interval_ms
self._partition_assignment_strategy = partition_assignment_strategy
self._key_deserializer = key_deserializer
self._value_deserializer = value_deserializer
self._fetch_min_bytes = fetch_min_bytes
self._fetch_max_bytes = fetch_max_bytes
self._fetch_max_wait_ms = fetch_max_wait_ms
self._max_partition_fetch_bytes = max_partition_fetch_bytes
self._exclude_internal_topics = exclude_internal_topics
self._max_poll_records = max_poll_records
self._consumer_timeout = consumer_timeout_ms / 1000
self._isolation_level = isolation_level
self._rebalance_timeout_ms = rebalance_timeout_ms
self._max_poll_interval_ms = max_poll_interval_ms
self._check_crcs = check_crcs
self._subscription = SubscriptionState(loop=loop)
self._fetcher = None
self._coordinator = None
self._loop = loop
if loop.get_debug():
self._source_traceback = traceback.extract_stack(sys._getframe(1))
self._closed = False
if topics:
topics = self._validate_topics(topics)
self._client.set_topics(topics)
self._subscription.subscribe(topics=topics)
def __del__(self, _warnings=warnings):
if self._closed is False:
_warnings.warn(f"Unclosed AIOKafkaConsumer {self!r}",
ResourceWarning,
source=self)
context = {'consumer': self,
'message': 'Unclosed AIOKafkaConsumer'}
if self._source_traceback is not None:
context['source_traceback'] = self._source_traceback
self._loop.call_exception_handler(context)
[docs] async def start(self):
""" Connect to Kafka cluster. This will:
* Load metadata for all cluster nodes and partition allocation
* Wait for possible topic autocreation
* Join group if ``group_id`` provided
"""
assert self._loop is get_running_loop(), (
"Please create objects with the same loop as running with"
)
assert self._fetcher is None, "Did you call `start` twice?"
await self._client.bootstrap()
await self._wait_topics()
if self._client.api_version < (0, 9):
raise ValueError(
f"Unsupported Kafka version: {self._client.api_version}"
)
if self._isolation_level == "read_committed" and \
self._client.api_version < (0, 11):
raise UnsupportedVersionError(
"`read_committed` isolation_level available only for Brokers "
"0.11 and above")
self._fetcher = Fetcher(
self._client, self._subscription,
key_deserializer=self._key_deserializer,
value_deserializer=self._value_deserializer,
fetch_min_bytes=self._fetch_min_bytes,
fetch_max_bytes=self._fetch_max_bytes,
fetch_max_wait_ms=self._fetch_max_wait_ms,
max_partition_fetch_bytes=self._max_partition_fetch_bytes,
check_crcs=self._check_crcs,
fetcher_timeout=self._consumer_timeout,
retry_backoff_ms=self._retry_backoff_ms,
auto_offset_reset=self._auto_offset_reset,
isolation_level=self._isolation_level)
if self._group_id is not None:
# using group coordinator for automatic partitions assignment
self._coordinator = GroupCoordinator(
self._client, self._subscription,
group_id=self._group_id,
heartbeat_interval_ms=self._heartbeat_interval_ms,
session_timeout_ms=self._session_timeout_ms,
retry_backoff_ms=self._retry_backoff_ms,
enable_auto_commit=self._enable_auto_commit,
auto_commit_interval_ms=self._auto_commit_interval_ms,
assignors=self._partition_assignment_strategy,
exclude_internal_topics=self._exclude_internal_topics,
rebalance_timeout_ms=self._rebalance_timeout_ms,
max_poll_interval_ms=self._max_poll_interval_ms
)
if self._subscription.subscription is not None:
if self._subscription.partitions_auto_assigned():
# Either we passed `topics` to constructor or `subscribe`
# was called before `start`
await self._subscription.wait_for_assignment()
else:
# `assign` was called before `start`. We did not start
# this task on that call, as coordinator was yet to be
# created
self._coordinator.start_commit_offsets_refresh_task(
self._subscription.subscription.assignment)
else:
# Using a simple assignment coordinator for reassignment on
# metadata changes
self._coordinator = NoGroupCoordinator(
self._client, self._subscription,
exclude_internal_topics=self._exclude_internal_topics)
if self._subscription.subscription is not None:
if self._subscription.partitions_auto_assigned():
# Either we passed `topics` to constructor or `subscribe`
# was called before `start`
await self._client.force_metadata_update()
self._coordinator.assign_all_partitions(check_unknown=True)
async def _wait_topics(self):
if self._subscription.subscription is not None:
for topic in self._subscription.subscription.topics:
await self._client._wait_on_metadata(topic)
def _validate_topics(self, topics):
if not isinstance(topics, (tuple, set, list)):
raise ValueError("Topics should be list of strings")
return set(topics)
[docs] def assign(self, partitions):
"""Manually assign a list of :class:`.TopicPartition` to this consumer.
This interface does not support incremental assignment and will
replace the previous assignment (if there was one).
Arguments:
partitions (list(TopicPartition)): assignment for this instance.
Raises:
IllegalStateError: if consumer has already called :meth:`subscribe`
Warning:
It is not possible to use both manual partition assignment with
:meth:`assign` and group assignment with :meth:`subscribe`.
Note:
Manual topic assignment through this method does not use the
consumer's group management functionality. As such, there will be
**no rebalance operation triggered** when group membership or
cluster and topic metadata change.
"""
self._subscription.assign_from_user(partitions)
self._client.set_topics([tp.topic for tp in partitions])
# If called before `start` we will delegate this to `start` call
if self._coordinator is not None:
if self._group_id is not None:
# refresh commit positions for all assigned partitions
assignment = self._subscription.subscription.assignment
self._coordinator.start_commit_offsets_refresh_task(assignment)
[docs] def assignment(self):
""" Get the set of partitions currently assigned to this consumer.
If partitions were directly assigned using :meth:`assign`, then this will
simply return the same partitions that were previously assigned.
If topics were subscribed using :meth:`subscribe`, then this will give
the set of topic partitions currently assigned to the consumer (which
may be empty if the assignment hasn't happened yet or if the partitions
are in the process of being reassigned).
Returns:
set(TopicPartition): the set of partitions currently assigned to
this consumer
"""
return self._subscription.assigned_partitions()
[docs] async def stop(self):
""" Close the consumer, while waiting for finalizers:
* Commit last consumed message if autocommit enabled
* Leave group if used Consumer Groups
"""
if self._closed:
return
log.debug("Closing the KafkaConsumer.")
self._closed = True
if self._coordinator:
await self._coordinator.close()
if self._fetcher:
await self._fetcher.close()
await self._client.close()
log.debug("The KafkaConsumer has closed.")
[docs] async def commit(self, offsets=None):
""" Commit offsets to Kafka.
This commits offsets only to Kafka. The offsets committed using this
API will be used on the first fetch after every rebalance and also on
startup. As such, if you need to store offsets in anything other than
Kafka, this API should not be used.
Currently only supports kafka-topic offset storage (not Zookeeper)
When explicitly passing `offsets` use either offset of next record,
or tuple of offset and metadata::
tp = TopicPartition(msg.topic, msg.partition)
metadata = "Some utf-8 metadata"
# Either
await consumer.commit({tp: msg.offset + 1})
# Or position directly
await consumer.commit({tp: (msg.offset + 1, metadata)})
.. note:: If you want *fire and forget* commit, like
:meth:`~kafka.KafkaConsumer.commit_async` in `kafka-python`_, just
run it in a task. Something like::
fut = loop.create_task(consumer.commit())
fut.add_done_callback(on_commit_done)
Arguments:
offsets (dict, Optional): A mapping from :class:`.TopicPartition` to
``(offset, metadata)`` to commit with the configured ``group_id``.
Defaults to current consumed offsets for all subscribed partitions.
Raises:
~aiokafka.errors.CommitFailedError: If membership already changed on broker.
~aiokafka.errors.IllegalOperation: If used with ``group_id == None``.
~aiokafka.errors.IllegalStateError: If partitions not assigned.
~aiokafka.errors.KafkaError: If commit failed on broker side. This
could be due to invalid offset, too long metadata, authorization
failure, etc.
ValueError: If offsets is of wrong format.
.. versionchanged:: 0.4.0
Changed :exc:`AssertionError` to
:exc:`~aiokafka.errors.IllegalStateError` in case of unassigned
partition.
.. versionchanged:: 0.4.0
Will now raise :exc:`~aiokafka.errors.CommitFailedError` in case
membership changed, as (possibly) this partition is handled by
another consumer.
"""
if self._group_id is None:
raise IllegalOperation("Requires group_id")
subscription = self._subscription.subscription
if subscription is None:
raise IllegalStateError("Not subscribed to any topics")
assignment = subscription.assignment
if assignment is None:
raise IllegalStateError("No partitions assigned")
if offsets is None:
offsets = assignment.all_consumed_offsets()
else:
offsets = commit_structure_validate(offsets)
for tp in offsets:
if tp not in assignment.tps:
raise IllegalStateError(
f"Partition {tp} is not assigned")
await self._coordinator.commit_offsets(assignment, offsets)
[docs] async def committed(self, partition):
""" Get the last committed offset for the given partition. (whether the
commit happened by this process or another).
This offset will be used as the position for the consumer in the event
of a failure.
This call will block to do a remote call to get the latest offset, as
those are not cached by consumer (Transactional Producer can change
them without Consumer knowledge as of Kafka 0.11.0)
Arguments:
partition (TopicPartition): the partition to check
Returns:
The last committed offset, or None if there was no prior commit.
Raises:
IllegalOperation: If used with ``group_id == None``
"""
if self._group_id is None:
raise IllegalOperation("Requires group_id")
commit_map = await self._coordinator.fetch_committed_offsets(
[partition])
if partition in commit_map:
committed = commit_map[partition].offset
if committed == -1:
committed = None
else:
committed = None
return committed
[docs] async def topics(self):
""" Get all topics the user is authorized to view.
Returns:
set: topics
"""
cluster = await self._client.fetch_all_metadata()
return cluster.topics()
[docs] def partitions_for_topic(self, topic):
""" Get metadata about the partitions for a given topic.
This method will return `None` if Consumer does not already have
metadata for this topic.
Arguments:
topic (str): topic to check
Returns:
set: partition ids
"""
return self._client.cluster.partitions_for_topic(topic)
[docs] async def position(self, partition):
""" Get the offset of the *next record* that will be fetched (if a
record with that offset exists on broker).
Arguments:
partition (TopicPartition): partition to check
Returns:
int: offset
Raises:
IllegalStateError: partition is not assigned
.. versionchanged:: 0.4.0
Changed :exc:`AssertionError` to
:exc:`~aiokafka.errors.IllegalStateError` in case of unassigned
partition
"""
while True:
if not self._subscription.is_assigned(partition):
raise IllegalStateError(
f'Partition {partition} is not assigned')
assignment = self._subscription.subscription.assignment
tp_state = assignment.state_value(partition)
if not tp_state.has_valid_position:
self._coordinator.check_errors()
await asyncio.wait(
[tp_state.wait_for_position(),
assignment.unassign_future],
timeout=self._request_timeout_ms / 1000,
return_when=asyncio.FIRST_COMPLETED
)
if not tp_state.has_valid_position:
if self._subscription.subscription is None:
raise IllegalStateError(
f'Partition {partition} is not assigned')
if self._subscription.subscription.assignment is None:
self._coordinator.check_errors()
await self._subscription.wait_for_assignment()
continue
return tp_state.position
[docs] def highwater(self, partition):
""" Last known highwater offset for a partition.
A highwater offset is the offset that will be assigned to the next
message that is produced. It may be useful for calculating lag, by
comparing with the reported position. Note that both position and
highwater refer to the *next* offset – i.e., highwater offset is one
greater than the newest available message.
Highwater offsets are returned as part of ``FetchResponse``, so will
not be available if messages for this partition were not requested yet.
Arguments:
partition (TopicPartition): partition to check
Returns:
int or None: offset if available
"""
assert self._subscription.is_assigned(partition), \
'Partition is not assigned'
assignment = self._subscription.subscription.assignment
return assignment.state_value(partition).highwater
[docs] def last_stable_offset(self, partition):
""" Returns the Last Stable Offset of a topic. It will be the last
offset up to which point all transactions were completed. Only
available in with isolation_level `read_committed`, in
`read_uncommitted` will always return -1. Will return None for older
Brokers.
As with :meth:`highwater` will not be available until some messages are
consumed.
Arguments:
partition (TopicPartition): partition to check
Returns:
int or None: offset if available
"""
assert self._subscription.is_assigned(partition), \
'Partition is not assigned'
assignment = self._subscription.subscription.assignment
return assignment.state_value(partition).lso
[docs] def last_poll_timestamp(self, partition):
""" Returns the timestamp of the last poll of this partition (in ms).
It is the last time :meth:`highwater` and :meth:`last_stable_offset` were
updated. However it does not mean that new messages were received.
As with :meth:`highwater` will not be available until some messages are
consumed.
Arguments:
partition (TopicPartition): partition to check
Returns:
int or None: timestamp if available
"""
assert self._subscription.is_assigned(partition), \
'Partition is not assigned'
assignment = self._subscription.subscription.assignment
return assignment.state_value(partition).timestamp
[docs] def seek(self, partition, offset):
""" Manually specify the fetch offset for a :class:`.TopicPartition`.
Overrides the fetch offsets that the consumer will use on the next
:meth:`getmany`/:meth:`getone` call. If this API is invoked for the same
partition more than once, the latest offset will be used on the next
fetch.
Note:
You may lose data if this API is arbitrarily used in the middle
of consumption to reset the fetch offsets. Use it either on
rebalance listeners or after all pending messages are processed.
Arguments:
partition (TopicPartition): partition for seek operation
offset (int): message offset in partition
Raises:
ValueError: if offset is not a positive integer
IllegalStateError: partition is not currently assigned
.. versionchanged:: 0.4.0
Changed :exc:`AssertionError` to
:exc:`~aiokafka.errors.IllegalStateError` and :exc:`ValueError` in
respective cases.
"""
if not isinstance(offset, int) or offset < 0:
raise ValueError("Offset must be a positive integer")
log.debug("Seeking to offset %s for partition %s", offset, partition)
self._fetcher.seek_to(partition, offset)
[docs] async def seek_to_beginning(self, *partitions):
""" Seek to the oldest available offset for partitions.
Arguments:
*partitions: Optionally provide specific :class:`.TopicPartition`,
otherwise default to all assigned partitions.
Raises:
IllegalStateError: If any partition is not currently assigned
TypeError: If partitions are not instances of :class:`.TopicPartition`
.. versionadded:: 0.3.0
"""
if not all([isinstance(p, TopicPartition) for p in partitions]):
raise TypeError('partitions must be TopicPartition instances')
if not partitions:
partitions = self._subscription.assigned_partitions()
assert partitions, 'No partitions are currently assigned'
else:
not_assigned = (
set(partitions) - self._subscription.assigned_partitions()
)
if not_assigned:
raise IllegalStateError(
f"Partitions {not_assigned} are not assigned")
for tp in partitions:
log.debug("Seeking to beginning of partition %s", tp)
fut = self._fetcher.request_offset_reset(
partitions, OffsetResetStrategy.EARLIEST)
assignment = self._subscription.subscription.assignment
await asyncio.wait(
[fut, assignment.unassign_future],
timeout=self._request_timeout_ms / 1000,
return_when=asyncio.FIRST_COMPLETED
)
self._coordinator.check_errors()
return fut.done()
[docs] async def seek_to_end(self, *partitions):
"""Seek to the most recent available offset for partitions.
Arguments:
*partitions: Optionally provide specific :class:`.TopicPartition`,
otherwise default to all assigned partitions.
Raises:
IllegalStateError: If any partition is not currently assigned
TypeError: If partitions are not instances of :class:`.TopicPartition`
.. versionadded:: 0.3.0
"""
if not all([isinstance(p, TopicPartition) for p in partitions]):
raise TypeError('partitions must be TopicPartition instances')
if not partitions:
partitions = self._subscription.assigned_partitions()
assert partitions, 'No partitions are currently assigned'
else:
not_assigned = (
set(partitions) - self._subscription.assigned_partitions()
)
if not_assigned:
raise IllegalStateError(
f"Partitions {not_assigned} are not assigned")
for tp in partitions:
log.debug("Seeking to end of partition %s", tp)
fut = self._fetcher.request_offset_reset(
partitions, OffsetResetStrategy.LATEST)
assignment = self._subscription.subscription.assignment
await asyncio.wait(
[fut, assignment.unassign_future],
timeout=self._request_timeout_ms / 1000,
return_when=asyncio.FIRST_COMPLETED
)
self._coordinator.check_errors()
return fut.done()
[docs] async def seek_to_committed(self, *partitions):
""" Seek to the committed offset for partitions.
Arguments:
*partitions: Optionally provide specific :class:`.TopicPartition`,
otherwise default to all assigned partitions.
Returns:
dict(TopicPartition, int): mapping
of the currently committed offsets.
Raises:
IllegalStateError: If any partition is not currently assigned
IllegalOperation: If used with ``group_id == None``
.. versionchanged:: 0.3.0
Changed :exc:`AssertionError` to
:exc:`~aiokafka.errors.IllegalStateError` in case of unassigned
partition
"""
if not all([isinstance(p, TopicPartition) for p in partitions]):
raise TypeError('partitions must be TopicPartition instances')
if not partitions:
partitions = self._subscription.assigned_partitions()
assert partitions, 'No partitions are currently assigned'
else:
not_assigned = (
set(partitions) - self._subscription.assigned_partitions()
)
if not_assigned:
raise IllegalStateError(
f"Partitions {not_assigned} are not assigned")
committed_offsets = {}
for tp in partitions:
offset = await self.committed(tp)
committed_offsets[tp] = offset
log.debug("Seeking to committed of partition %s %s", tp, offset)
if offset and offset > 0:
self._fetcher.seek_to(tp, offset)
return committed_offsets
[docs] async def offsets_for_times(self, timestamps):
"""
Look up the offsets for the given partitions by timestamp. The returned
offset for each partition is the earliest offset whose timestamp is
greater than or equal to the given timestamp in the corresponding
partition.
The consumer does not have to be assigned the partitions.
If the message format version in a partition is before 0.10.0, i.e.
the messages do not have timestamps, ``None`` will be returned for that
partition.
Note:
This method may block indefinitely if the partition does not exist.
Arguments:
timestamps (dict(TopicPartition, int)): mapping from partition
to the timestamp to look up. Unit should be milliseconds since
beginning of the epoch (midnight Jan 1, 1970 (UTC))
Returns:
dict(TopicPartition, OffsetAndTimestamp): mapping from
partition to the timestamp and offset of the first message with
timestamp greater than or equal to the target timestamp.
Raises:
ValueError: If the target timestamp is negative
UnsupportedVersionError: If the broker does not support looking
up the offsets by timestamp.
KafkaTimeoutError: If fetch failed in `request_timeout_ms`
.. versionadded:: 0.3.0
"""
if self._client.api_version <= (0, 10, 0):
raise UnsupportedVersionError(
"offsets_for_times API not supported"
f" for cluster version {self._client.api_version}"
)
for tp, ts in timestamps.items():
timestamps[tp] = int(ts)
if ts < 0:
raise ValueError(
f"The target time for partition {tp} is {ts}."
" The target time cannot be negative."
)
offsets = await self._fetcher.get_offsets_by_times(
timestamps, self._request_timeout_ms)
return offsets
[docs] async def beginning_offsets(self, partitions):
""" Get the first offset for the given partitions.
This method does not change the current consumer position of the
partitions.
Note:
This method may block indefinitely if the partition does not exist.
Arguments:
partitions (list[TopicPartition]): List of :class:`.TopicPartition`
instances to fetch offsets for.
Returns:
dict [TopicPartition, int]: mapping of partition to earliest
available offset.
Raises:
UnsupportedVersionError: If the broker does not support looking
up the offsets by timestamp.
KafkaTimeoutError: If fetch failed in `request_timeout_ms`.
.. versionadded:: 0.3.0
"""
if self._client.api_version <= (0, 10, 0):
raise UnsupportedVersionError(
"offsets_for_times API not supported"
f" for cluster version {self._client.api_version}"
)
offsets = await self._fetcher.beginning_offsets(
partitions, self._request_timeout_ms)
return offsets
[docs] async def end_offsets(self, partitions):
""" Get the last offset for the given partitions. The last offset of a
partition is the offset of the upcoming message, i.e. the offset of the
last available message + 1.
This method does not change the current consumer position of the
partitions.
Note:
This method may block indefinitely if the partition does not exist.
Arguments:
partitions (list[TopicPartition]): List of :class:`.TopicPartition`
instances to fetch offsets for.
Returns:
dict [TopicPartition, int]: mapping of partition to last
available offset + 1.
Raises:
UnsupportedVersionError: If the broker does not support looking
up the offsets by timestamp.
KafkaTimeoutError: If fetch failed in ``request_timeout_ms``
.. versionadded:: 0.3.0
"""
if self._client.api_version <= (0, 10, 0):
raise UnsupportedVersionError(
"offsets_for_times API not supported"
f" for cluster version {self._client.api_version}"
)
offsets = await self._fetcher.end_offsets(
partitions, self._request_timeout_ms)
return offsets
[docs] def subscribe(self, topics=(), pattern=None, listener=None):
""" Subscribe to a list of topics, or a topic regex pattern.
Partitions will be dynamically assigned via a group coordinator.
Topic subscriptions are not incremental: this list will replace the
current assignment (if there is one).
This method is incompatible with :meth:`assign`.
Arguments:
topics (list): List of topics for subscription.
pattern (str): Pattern to match available topics. You must provide
either topics or pattern, but not both.
listener (ConsumerRebalanceListener): Optionally include listener
callback, which will be called before and after each rebalance
operation.
As part of group management, the consumer will keep track of
the list of consumers that belong to a particular group and
will trigger a rebalance operation if one of the following
events trigger:
* Number of partitions change for any of the subscribed topics
* Topic is created or deleted
* An existing member of the consumer group dies
* A new member is added to the consumer group
When any of these events are triggered, the provided listener
will be invoked first to indicate that the consumer's
assignment has been revoked, and then again when the new
assignment has been received. Note that this listener will
immediately override any listener set in a previous call
to subscribe. It is guaranteed, however, that the partitions
revoked/assigned
through this interface are from topics subscribed in this call.
Raises:
IllegalStateError: if called after previously calling :meth:`assign`
ValueError: if neither topics or pattern is provided or both
are provided
TypeError: if listener is not a :class:`.ConsumerRebalanceListener`
"""
if not (topics or pattern):
raise ValueError(
"You should provide either `topics` or `pattern`")
if topics and pattern:
raise ValueError(
"You can't provide both `topics` and `pattern`")
if listener is not None and \
not isinstance(listener, ConsumerRebalanceListener):
raise TypeError(
"listener should be an instance of ConsumerRebalanceListener")
if pattern is not None:
try:
pattern = re.compile(pattern)
except re.error as err:
raise ValueError(
f"{pattern!r} is not a valid pattern: {err}")
self._subscription.subscribe_pattern(
pattern=pattern, listener=listener)
# NOTE: set_topics will trigger a rebalance, so the coordinator
# will get the initial subscription shortly by ``metadata_changed``
# handler.
self._client.set_topics([])
log.info("Subscribed to topic pattern: %s", pattern)
elif topics:
topics = self._validate_topics(topics)
self._subscription.subscribe(
topics=topics, listener=listener)
self._client.set_topics(self._subscription.subscription.topics)
if self._group_id is None:
# We have reset the assignment, but client.set_topics will
# not always do a metadata update. We force it to do it even
# if metadata did not change. This will trigger a reassignment
# on NoGroupCoordinator, but only if snapshot did not change,
# thus we reset it too.
self._client.force_metadata_update()
if self._coordinator is not None:
self._coordinator._metadata_snapshot = {}
log.info("Subscribed to topic(s): %s", topics)
[docs] def subscription(self):
""" Get the current topics subscription.
Returns:
frozenset(str): a set of topics
"""
return self._subscription.topics
[docs] def unsubscribe(self):
""" Unsubscribe from all topics and clear all assigned partitions. """
self._subscription.unsubscribe()
if self._group_id is not None:
self._coordinator.maybe_leave_group()
self._client.set_topics([])
log.info(
"Unsubscribed all topics or patterns and assigned partitions")
[docs] async def getone(self, *partitions):
"""
Get one message from Kafka.
If no new messages prefetched, this method will wait for it.
Arguments:
partitions (list(TopicPartition)): Optional list of partitions to
return from. If no partitions specified then returned message
will be from any partition, which consumer is subscribed to.
Returns:
~aiokafka.structs.ConsumerRecord: the message
Will return instance of
.. code:: python
collections.namedtuple(
"ConsumerRecord",
["topic", "partition", "offset", "key", "value"])
Example usage:
.. code:: python
while True:
message = await consumer.getone()
topic = message.topic
partition = message.partition
# Process message
print(message.offset, message.key, message.value)
"""
assert all(map(lambda k: isinstance(k, TopicPartition), partitions))
if self._closed:
raise ConsumerStoppedError()
# Raise coordination errors if any
self._coordinator.check_errors()
with self._subscription.fetch_context():
msg = await self._fetcher.next_record(partitions)
return msg
[docs] async def getmany(self, *partitions, timeout_ms=0, max_records=None):
"""Get messages from assigned topics / partitions.
Prefetched messages are returned in batches by topic-partition.
If messages is not available in the prefetched buffer this method waits
`timeout_ms` milliseconds.
Arguments:
partitions (list[TopicPartition]): The partitions that need
fetching message. If no one partition specified then all
subscribed partitions will be used
timeout_ms (int, Optional): milliseconds spent waiting if
data is not available in the buffer. If 0, returns immediately
with any records that are available currently in the buffer,
else returns empty. Must not be negative. Default: 0
Returns:
dict(TopicPartition, list[ConsumerRecord]): topic to list of
records since the last fetch for the subscribed list of topics and
partitions
Example usage:
.. code:: python
data = await consumer.getmany()
for tp, messages in data.items():
topic = tp.topic
partition = tp.partition
for message in messages:
# Process message
print(message.offset, message.key, message.value)
"""
assert all(map(lambda k: isinstance(k, TopicPartition), partitions))
if self._closed:
raise ConsumerStoppedError()
if max_records is not None and (
not isinstance(max_records, int) or max_records < 1):
raise ValueError("`max_records` must be a positive Integer")
# Raise coordination errors if any
self._coordinator.check_errors()
timeout = timeout_ms / 1000
with self._subscription.fetch_context():
records = await self._fetcher.fetched_records(
partitions, timeout,
max_records=max_records or self._max_poll_records)
return records
[docs] def pause(self, *partitions):
"""Suspend fetching from the requested partitions.
Future calls to :meth:`.getmany` will not return any records from these
partitions until they have been resumed using :meth:`.resume`.
Note: This method does not affect partition subscription.
In particular, it does not cause a group rebalance when automatic
assignment is used.
Arguments:
*partitions (list[TopicPartition]): Partitions to pause.
"""
if not all([isinstance(p, TopicPartition) for p in partitions]):
raise TypeError('partitions must be TopicPartition namedtuples')
for partition in partitions:
log.debug("Pausing partition %s", partition)
self._subscription.pause(partition)
[docs] def paused(self):
"""Get the partitions that were previously paused using
:meth:`.pause`.
Returns:
set[TopicPartition]: partitions
"""
return self._subscription.paused_partitions()
[docs] def resume(self, *partitions):
"""Resume fetching from the specified (paused) partitions.
Arguments:
*partitions (list[TopicPartition]): Partitions to resume.
"""
if not all([isinstance(p, TopicPartition) for p in partitions]):
raise TypeError('partitions must be TopicPartition namedtuples')
for partition in partitions:
log.debug("Resuming partition %s", partition)
self._subscription.resume(partition)
def __aiter__(self):
if self._closed:
raise ConsumerStoppedError()
return self
async def __anext__(self):
"""Asyncio iterator interface for consumer
Note:
TopicAuthorizationFailedError and OffsetOutOfRangeError
exceptions can be raised in iterator.
All other KafkaError exceptions will be logged and not raised
"""
while True:
try:
return (await self.getone())
except ConsumerStoppedError:
raise StopAsyncIteration # noqa: F821
except (TopicAuthorizationFailedError,
OffsetOutOfRangeError,
NoOffsetForPartitionError) as err:
raise err
except RecordTooLargeError:
log.exception("error in consumer iterator: %s")
async def __aenter__(self):
await self.start()
return self
async def __aexit__(self, exc_type, exc, tb):
await self.stop()