Skip to content
Snippets Groups Projects
Commit 0a398936 authored by Dean Jukes's avatar Dean Jukes
Browse files

venv

parents
No related branches found
No related tags found
No related merge requests found
Showing
with 229 additions and 0 deletions
File added
File added
This diff is collapsed.
This diff is collapsed.
File added
from __future__ import absolute_import
import abc
import logging
log = logging.getLogger(__name__)
class AbstractPartitionAssignor(object):
"""
Abstract assignor implementation which does some common grunt work (in particular collecting
partition counts which are always needed in assignors).
"""
@abc.abstractproperty
def name(self):
""".name should be a string identifying the assignor"""
pass
@abc.abstractmethod
def assign(self, cluster, members):
"""Perform group assignment given cluster metadata and member subscriptions
Arguments:
cluster (ClusterMetadata): metadata for use in assignment
members (dict of {member_id: MemberMetadata}): decoded metadata for
each member in the group.
Returns:
dict: {member_id: MemberAssignment}
"""
pass
@abc.abstractmethod
def metadata(self, topics):
"""Generate ProtocolMetadata to be submitted via JoinGroupRequest.
Arguments:
topics (set): a member's subscribed topics
Returns:
MemberMetadata struct
"""
pass
@abc.abstractmethod
def on_assignment(self, assignment):
"""Callback that runs on each assignment.
This method can be used to update internal state, if any, of the
partition assignor.
Arguments:
assignment (MemberAssignment): the member's assignment
"""
pass
from __future__ import absolute_import
import collections
import logging
from kafka.vendor import six
from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor
from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment
log = logging.getLogger(__name__)
class RangePartitionAssignor(AbstractPartitionAssignor):
"""
The range assignor works on a per-topic basis. For each topic, we lay out
the available partitions in numeric order and the consumers in
lexicographic order. We then divide the number of partitions by the total
number of consumers to determine the number of partitions to assign to each
consumer. If it does not evenly divide, then the first few consumers will
have one extra partition.
For example, suppose there are two consumers C0 and C1, two topics t0 and
t1, and each topic has 3 partitions, resulting in partitions t0p0, t0p1,
t0p2, t1p0, t1p1, and t1p2.
The assignment will be:
C0: [t0p0, t0p1, t1p0, t1p1]
C1: [t0p2, t1p2]
"""
name = 'range'
version = 0
@classmethod
def assign(cls, cluster, member_metadata):
consumers_per_topic = collections.defaultdict(list)
for member, metadata in six.iteritems(member_metadata):
for topic in metadata.subscription:
consumers_per_topic[topic].append(member)
# construct {member_id: {topic: [partition, ...]}}
assignment = collections.defaultdict(dict)
for topic, consumers_for_topic in six.iteritems(consumers_per_topic):
partitions = cluster.partitions_for_topic(topic)
if partitions is None:
log.warning('No partition metadata for topic %s', topic)
continue
partitions = sorted(partitions)
consumers_for_topic.sort()
partitions_per_consumer = len(partitions) // len(consumers_for_topic)
consumers_with_extra = len(partitions) % len(consumers_for_topic)
for i, member in enumerate(consumers_for_topic):
start = partitions_per_consumer * i
start += min(i, consumers_with_extra)
length = partitions_per_consumer
if not i + 1 > consumers_with_extra:
length += 1
assignment[member][topic] = partitions[start:start+length]
protocol_assignment = {}
for member_id in member_metadata:
protocol_assignment[member_id] = ConsumerProtocolMemberAssignment(
cls.version,
sorted(assignment[member_id].items()),
b'')
return protocol_assignment
@classmethod
def metadata(cls, topics):
return ConsumerProtocolMemberMetadata(cls.version, list(topics), b'')
@classmethod
def on_assignment(cls, assignment):
pass
from __future__ import absolute_import
import collections
import itertools
import logging
from kafka.vendor import six
from kafka.coordinator.assignors.abstract import AbstractPartitionAssignor
from kafka.coordinator.protocol import ConsumerProtocolMemberMetadata, ConsumerProtocolMemberAssignment
from kafka.structs import TopicPartition
log = logging.getLogger(__name__)
class RoundRobinPartitionAssignor(AbstractPartitionAssignor):
"""
The roundrobin assignor lays out all the available partitions and all the
available consumers. It then proceeds to do a roundrobin assignment from
partition to consumer. If the subscriptions of all consumer instances are
identical, then the partitions will be uniformly distributed. (i.e., the
partition ownership counts will be within a delta of exactly one across all
consumers.)
For example, suppose there are two consumers C0 and C1, two topics t0 and
t1, and each topic has 3 partitions, resulting in partitions t0p0, t0p1,
t0p2, t1p0, t1p1, and t1p2.
The assignment will be:
C0: [t0p0, t0p2, t1p1]
C1: [t0p1, t1p0, t1p2]
When subscriptions differ across consumer instances, the assignment process
still considers each consumer instance in round robin fashion but skips
over an instance if it is not subscribed to the topic. Unlike the case when
subscriptions are identical, this can result in imbalanced assignments.
For example, suppose we have three consumers C0, C1, C2, and three topics
t0, t1, t2, with unbalanced partitions t0p0, t1p0, t1p1, t2p0, t2p1, t2p2,
where C0 is subscribed to t0; C1 is subscribed to t0, t1; and C2 is
subscribed to t0, t1, t2.
The assignment will be:
C0: [t0p0]
C1: [t1p0]
C2: [t1p1, t2p0, t2p1, t2p2]
"""
name = 'roundrobin'
version = 0
@classmethod
def assign(cls, cluster, member_metadata):
all_topics = set()
for metadata in six.itervalues(member_metadata):
all_topics.update(metadata.subscription)
all_topic_partitions = []
for topic in all_topics:
partitions = cluster.partitions_for_topic(topic)
if partitions is None:
log.warning('No partition metadata for topic %s', topic)
continue
for partition in partitions:
all_topic_partitions.append(TopicPartition(topic, partition))
all_topic_partitions.sort()
# construct {member_id: {topic: [partition, ...]}}
assignment = collections.defaultdict(lambda: collections.defaultdict(list))
member_iter = itertools.cycle(sorted(member_metadata.keys()))
for partition in all_topic_partitions:
member_id = next(member_iter)
# Because we constructed all_topic_partitions from the set of
# member subscribed topics, we should be safe assuming that
# each topic in all_topic_partitions is in at least one member
# subscription; otherwise this could yield an infinite loop
while partition.topic not in member_metadata[member_id].subscription:
member_id = next(member_iter)
assignment[member_id][partition.topic].append(partition.partition)
protocol_assignment = {}
for member_id in member_metadata:
protocol_assignment[member_id] = ConsumerProtocolMemberAssignment(
cls.version,
sorted(assignment[member_id].items()),
b'')
return protocol_assignment
@classmethod
def metadata(cls, topics):
return ConsumerProtocolMemberMetadata(cls.version, list(topics), b'')
@classmethod
def on_assignment(cls, assignment):
pass
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment