Add HashiCorp Nomad provider (#483)
* provider: adding Nomad provider * updating CONTRIBUTING.md with Nomad provider * updated README.md by adding the Nomad provider * fix typo * adding nomad/api and nomad/testutil deps * adding Nomad binary dependency for provider tests * fixed the nomad binary download command step and added tolerations to the nomad provider. * adding nomad provider demo gif * adding my name to authors * adding two missing go-rootcerts files after dep ensure * delete pod comment
This commit is contained in:
committed by
Robbie Zhang
parent
5796be449b
commit
a46e1dd2ce
354
vendor/github.com/hashicorp/raft/LICENSE
generated
vendored
Normal file
354
vendor/github.com/hashicorp/raft/LICENSE
generated
vendored
Normal file
@@ -0,0 +1,354 @@
|
||||
Mozilla Public License, version 2.0
|
||||
|
||||
1. Definitions
|
||||
|
||||
1.1. “Contributor”
|
||||
|
||||
means each individual or legal entity that creates, contributes to the
|
||||
creation of, or owns Covered Software.
|
||||
|
||||
1.2. “Contributor Version”
|
||||
|
||||
means the combination of the Contributions of others (if any) used by a
|
||||
Contributor and that particular Contributor’s Contribution.
|
||||
|
||||
1.3. “Contribution”
|
||||
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. “Covered Software”
|
||||
|
||||
means Source Code Form to which the initial Contributor has attached the
|
||||
notice in Exhibit A, the Executable Form of such Source Code Form, and
|
||||
Modifications of such Source Code Form, in each case including portions
|
||||
thereof.
|
||||
|
||||
1.5. “Incompatible With Secondary Licenses”
|
||||
means
|
||||
|
||||
a. that the initial Contributor has attached the notice described in
|
||||
Exhibit B to the Covered Software; or
|
||||
|
||||
b. that the Covered Software was made available under the terms of version
|
||||
1.1 or earlier of the License, but not also under the terms of a
|
||||
Secondary License.
|
||||
|
||||
1.6. “Executable Form”
|
||||
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. “Larger Work”
|
||||
|
||||
means a work that combines Covered Software with other material, in a separate
|
||||
file or files, that is not Covered Software.
|
||||
|
||||
1.8. “License”
|
||||
|
||||
means this document.
|
||||
|
||||
1.9. “Licensable”
|
||||
|
||||
means having the right to grant, to the maximum extent possible, whether at the
|
||||
time of the initial grant or subsequently, any and all of the rights conveyed by
|
||||
this License.
|
||||
|
||||
1.10. “Modifications”
|
||||
|
||||
means any of the following:
|
||||
|
||||
a. any file in Source Code Form that results from an addition to, deletion
|
||||
from, or modification of the contents of Covered Software; or
|
||||
|
||||
b. any new file in Source Code Form that contains any Covered Software.
|
||||
|
||||
1.11. “Patent Claims” of a Contributor
|
||||
|
||||
means any patent claim(s), including without limitation, method, process,
|
||||
and apparatus claims, in any patent Licensable by such Contributor that
|
||||
would be infringed, but for the grant of the License, by the making,
|
||||
using, selling, offering for sale, having made, import, or transfer of
|
||||
either its Contributions or its Contributor Version.
|
||||
|
||||
1.12. “Secondary License”
|
||||
|
||||
means either the GNU General Public License, Version 2.0, the GNU Lesser
|
||||
General Public License, Version 2.1, the GNU Affero General Public
|
||||
License, Version 3.0, or any later versions of those licenses.
|
||||
|
||||
1.13. “Source Code Form”
|
||||
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. “You” (or “Your”)
|
||||
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, “You” includes any entity that controls, is
|
||||
controlled by, or is under common control with You. For purposes of this
|
||||
definition, “control” means (a) the power, direct or indirect, to cause
|
||||
the direction or management of such entity, whether by contract or
|
||||
otherwise, or (b) ownership of more than fifty percent (50%) of the
|
||||
outstanding shares or beneficial ownership of such entity.
|
||||
|
||||
|
||||
2. License Grants and Conditions
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
a. under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or as
|
||||
part of a Larger Work; and
|
||||
|
||||
b. under Patent Claims of such Contributor to make, use, sell, offer for
|
||||
sale, have made, import, and otherwise transfer either its Contributions
|
||||
or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution become
|
||||
effective for each Contribution on the date the Contributor first distributes
|
||||
such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under this
|
||||
License. No additional rights or licenses will be implied from the distribution
|
||||
or licensing of Covered Software under this License. Notwithstanding Section
|
||||
2.1(b) above, no patent license is granted by a Contributor:
|
||||
|
||||
a. for any code that a Contributor has removed from Covered Software; or
|
||||
|
||||
b. for infringements caused by: (i) Your and any other third party’s
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
c. under Patent Claims infringed by Covered Software in the absence of its
|
||||
Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks, or
|
||||
logos of any Contributor (except as may be necessary to comply with the
|
||||
notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this License
|
||||
(see Section 10.2) or under the terms of a Secondary License (if permitted
|
||||
under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its Contributions
|
||||
are its original creation(s) or it has sufficient rights to grant the
|
||||
rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under applicable
|
||||
copyright doctrines of fair use, fair dealing, or other equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
|
||||
Section 2.1.
|
||||
|
||||
|
||||
3. Responsibilities
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under the
|
||||
terms of this License. You must inform recipients that the Source Code Form
|
||||
of the Covered Software is governed by the terms of this License, and how
|
||||
they can obtain a copy of this License. You may not attempt to alter or
|
||||
restrict the recipients’ rights in the Source Code Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
a. such Covered Software must also be made available in Source Code Form,
|
||||
as described in Section 3.1, and You must inform recipients of the
|
||||
Executable Form how they can obtain a copy of such Source Code Form by
|
||||
reasonable means in a timely manner, at a charge no more than the cost
|
||||
of distribution to the recipient; and
|
||||
|
||||
b. You may distribute such Executable Form under the terms of this License,
|
||||
or sublicense it under different terms, provided that the license for
|
||||
the Executable Form does not attempt to limit or alter the recipients’
|
||||
rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for the
|
||||
Covered Software. If the Larger Work is a combination of Covered Software
|
||||
with a work governed by one or more Secondary Licenses, and the Covered
|
||||
Software is not Incompatible With Secondary Licenses, this License permits
|
||||
You to additionally distribute such Covered Software under the terms of
|
||||
such Secondary License(s), so that the recipient of the Larger Work may, at
|
||||
their option, further distribute the Covered Software under the terms of
|
||||
either this License or such Secondary License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices (including
|
||||
copyright notices, patent notices, disclaimers of warranty, or limitations
|
||||
of liability) contained within the Source Code Form of the Covered
|
||||
Software, except that You may alter any license notices to the extent
|
||||
required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on behalf
|
||||
of any Contributor. You must make it absolutely clear that any such
|
||||
warranty, support, indemnity, or liability obligation is offered by You
|
||||
alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this License
|
||||
with respect to some or all of the Covered Software due to statute, judicial
|
||||
order, or regulation then You must: (a) comply with the terms of this License
|
||||
to the maximum extent possible; and (b) describe the limitations and the code
|
||||
they affect. Such description must be placed in a text file included with all
|
||||
distributions of the Covered Software under this License. Except to the
|
||||
extent prohibited by statute or regulation, such description must be
|
||||
sufficiently detailed for a recipient of ordinary skill to be able to
|
||||
understand it.
|
||||
|
||||
5. Termination
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically if You
|
||||
fail to comply with any of its terms. However, if You become compliant,
|
||||
then the rights granted under this License from a particular Contributor
|
||||
are reinstated (a) provisionally, unless and until such Contributor
|
||||
explicitly and finally terminates Your grants, and (b) on an ongoing basis,
|
||||
if such Contributor fails to notify You of the non-compliance by some
|
||||
reasonable means prior to 60 days after You have come back into compliance.
|
||||
Moreover, Your grants from a particular Contributor are reinstated on an
|
||||
ongoing basis if such Contributor notifies You of the non-compliance by
|
||||
some reasonable means, this is the first time You have received notice of
|
||||
non-compliance with this License from such Contributor, and You become
|
||||
compliant prior to 30 days after Your receipt of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions, counter-claims,
|
||||
and cross-claims) alleging that a Contributor Version directly or
|
||||
indirectly infringes any patent, then the rights granted to You by any and
|
||||
all Contributors for the Covered Software under Section 2.1 of this License
|
||||
shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
|
||||
license agreements (excluding distributors and resellers) which have been
|
||||
validly granted by You or Your distributors under this License prior to
|
||||
termination shall survive termination.
|
||||
|
||||
6. Disclaimer of Warranty
|
||||
|
||||
Covered Software is provided under this License on an “as is” basis, without
|
||||
warranty of any kind, either expressed, implied, or statutory, including,
|
||||
without limitation, warranties that the Covered Software is free of defects,
|
||||
merchantable, fit for a particular purpose or non-infringing. The entire
|
||||
risk as to the quality and performance of the Covered Software is with You.
|
||||
Should any Covered Software prove defective in any respect, You (not any
|
||||
Contributor) assume the cost of any necessary servicing, repair, or
|
||||
correction. This disclaimer of warranty constitutes an essential part of this
|
||||
License. No use of any Covered Software is authorized under this License
|
||||
except under this disclaimer.
|
||||
|
||||
7. Limitation of Liability
|
||||
|
||||
Under no circumstances and under no legal theory, whether tort (including
|
||||
negligence), contract, or otherwise, shall any Contributor, or anyone who
|
||||
distributes Covered Software as permitted above, be liable to You for any
|
||||
direct, indirect, special, incidental, or consequential damages of any
|
||||
character including, without limitation, damages for lost profits, loss of
|
||||
goodwill, work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses, even if such party shall have been
|
||||
informed of the possibility of such damages. This limitation of liability
|
||||
shall not apply to liability for death or personal injury resulting from such
|
||||
party’s negligence to the extent applicable law prohibits such limitation.
|
||||
Some jurisdictions do not allow the exclusion or limitation of incidental or
|
||||
consequential damages, so this exclusion and limitation may not apply to You.
|
||||
|
||||
8. Litigation
|
||||
|
||||
Any litigation relating to this License may be brought only in the courts of
|
||||
a jurisdiction where the defendant maintains its principal place of business
|
||||
and such litigation shall be governed by laws of that jurisdiction, without
|
||||
reference to its conflict-of-law provisions. Nothing in this Section shall
|
||||
prevent a party’s ability to bring cross-claims or counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
|
||||
This License represents the complete agreement concerning the subject matter
|
||||
hereof. If any provision of this License is held to be unenforceable, such
|
||||
provision shall be reformed only to the extent necessary to make it
|
||||
enforceable. Any law or regulation which provides that the language of a
|
||||
contract shall be construed against the drafter shall not be used to construe
|
||||
this License against a Contributor.
|
||||
|
||||
|
||||
10. Versions of the License
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version of
|
||||
the License under which You originally received the Covered Software, or
|
||||
under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a modified
|
||||
version of this License if you rename the license and remove any
|
||||
references to the name of the license steward (except to note that such
|
||||
modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses
|
||||
If You choose to distribute Source Code Form that is Incompatible With
|
||||
Secondary Licenses under the terms of this version of the License, the
|
||||
notice described in Exhibit B of this License must be attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
|
||||
This Source Code Form is subject to the
|
||||
terms of the Mozilla Public License, v.
|
||||
2.0. If a copy of the MPL was not
|
||||
distributed with this file, You can
|
||||
obtain one at
|
||||
http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular file, then
|
||||
You may include the notice in a location (such as a LICENSE file in a relevant
|
||||
directory) where a recipient would be likely to look for such a notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - “Incompatible With Secondary Licenses” Notice
|
||||
|
||||
This Source Code Form is “Incompatible
|
||||
With Secondary Licenses”, as defined by
|
||||
the Mozilla Public License, v. 2.0.
|
||||
|
||||
1008
vendor/github.com/hashicorp/raft/api.go
generated
vendored
Normal file
1008
vendor/github.com/hashicorp/raft/api.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
151
vendor/github.com/hashicorp/raft/commands.go
generated
vendored
Normal file
151
vendor/github.com/hashicorp/raft/commands.go
generated
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
package raft
|
||||
|
||||
// RPCHeader is a common sub-structure used to pass along protocol version and
|
||||
// other information about the cluster. For older Raft implementations before
|
||||
// versioning was added this will default to a zero-valued structure when read
|
||||
// by newer Raft versions.
|
||||
type RPCHeader struct {
|
||||
// ProtocolVersion is the version of the protocol the sender is
|
||||
// speaking.
|
||||
ProtocolVersion ProtocolVersion
|
||||
}
|
||||
|
||||
// WithRPCHeader is an interface that exposes the RPC header.
|
||||
type WithRPCHeader interface {
|
||||
GetRPCHeader() RPCHeader
|
||||
}
|
||||
|
||||
// AppendEntriesRequest is the command used to append entries to the
|
||||
// replicated log.
|
||||
type AppendEntriesRequest struct {
|
||||
RPCHeader
|
||||
|
||||
// Provide the current term and leader
|
||||
Term uint64
|
||||
Leader []byte
|
||||
|
||||
// Provide the previous entries for integrity checking
|
||||
PrevLogEntry uint64
|
||||
PrevLogTerm uint64
|
||||
|
||||
// New entries to commit
|
||||
Entries []*Log
|
||||
|
||||
// Commit index on the leader
|
||||
LeaderCommitIndex uint64
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *AppendEntriesRequest) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// AppendEntriesResponse is the response returned from an
|
||||
// AppendEntriesRequest.
|
||||
type AppendEntriesResponse struct {
|
||||
RPCHeader
|
||||
|
||||
// Newer term if leader is out of date
|
||||
Term uint64
|
||||
|
||||
// Last Log is a hint to help accelerate rebuilding slow nodes
|
||||
LastLog uint64
|
||||
|
||||
// We may not succeed if we have a conflicting entry
|
||||
Success bool
|
||||
|
||||
// There are scenarios where this request didn't succeed
|
||||
// but there's no need to wait/back-off the next attempt.
|
||||
NoRetryBackoff bool
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *AppendEntriesResponse) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// RequestVoteRequest is the command used by a candidate to ask a Raft peer
|
||||
// for a vote in an election.
|
||||
type RequestVoteRequest struct {
|
||||
RPCHeader
|
||||
|
||||
// Provide the term and our id
|
||||
Term uint64
|
||||
Candidate []byte
|
||||
|
||||
// Used to ensure safety
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *RequestVoteRequest) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// RequestVoteResponse is the response returned from a RequestVoteRequest.
|
||||
type RequestVoteResponse struct {
|
||||
RPCHeader
|
||||
|
||||
// Newer term if leader is out of date.
|
||||
Term uint64
|
||||
|
||||
// Peers is deprecated, but required by servers that only understand
|
||||
// protocol version 0. This is not populated in protocol version 2
|
||||
// and later.
|
||||
Peers []byte
|
||||
|
||||
// Is the vote granted.
|
||||
Granted bool
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *RequestVoteResponse) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// InstallSnapshotRequest is the command sent to a Raft peer to bootstrap its
|
||||
// log (and state machine) from a snapshot on another peer.
|
||||
type InstallSnapshotRequest struct {
|
||||
RPCHeader
|
||||
SnapshotVersion SnapshotVersion
|
||||
|
||||
Term uint64
|
||||
Leader []byte
|
||||
|
||||
// These are the last index/term included in the snapshot
|
||||
LastLogIndex uint64
|
||||
LastLogTerm uint64
|
||||
|
||||
// Peer Set in the snapshot. This is deprecated in favor of Configuration
|
||||
// but remains here in case we receive an InstallSnapshot from a leader
|
||||
// that's running old code.
|
||||
Peers []byte
|
||||
|
||||
// Cluster membership.
|
||||
Configuration []byte
|
||||
// Log index where 'Configuration' entry was originally written.
|
||||
ConfigurationIndex uint64
|
||||
|
||||
// Size of the snapshot
|
||||
Size int64
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *InstallSnapshotRequest) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
|
||||
// InstallSnapshotResponse is the response returned from an
|
||||
// InstallSnapshotRequest.
|
||||
type InstallSnapshotResponse struct {
|
||||
RPCHeader
|
||||
|
||||
Term uint64
|
||||
Success bool
|
||||
}
|
||||
|
||||
// See WithRPCHeader.
|
||||
func (r *InstallSnapshotResponse) GetRPCHeader() RPCHeader {
|
||||
return r.RPCHeader
|
||||
}
|
||||
101
vendor/github.com/hashicorp/raft/commitment.go
generated
vendored
Normal file
101
vendor/github.com/hashicorp/raft/commitment.go
generated
vendored
Normal file
@@ -0,0 +1,101 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Commitment is used to advance the leader's commit index. The leader and
|
||||
// replication goroutines report in newly written entries with Match(), and
|
||||
// this notifies on commitCh when the commit index has advanced.
|
||||
type commitment struct {
|
||||
// protectes matchIndexes and commitIndex
|
||||
sync.Mutex
|
||||
// notified when commitIndex increases
|
||||
commitCh chan struct{}
|
||||
// voter ID to log index: the server stores up through this log entry
|
||||
matchIndexes map[ServerID]uint64
|
||||
// a quorum stores up through this log entry. monotonically increases.
|
||||
commitIndex uint64
|
||||
// the first index of this leader's term: this needs to be replicated to a
|
||||
// majority of the cluster before this leader may mark anything committed
|
||||
// (per Raft's commitment rule)
|
||||
startIndex uint64
|
||||
}
|
||||
|
||||
// newCommitment returns an commitment struct that notifies the provided
|
||||
// channel when log entries have been committed. A new commitment struct is
|
||||
// created each time this server becomes leader for a particular term.
|
||||
// 'configuration' is the servers in the cluster.
|
||||
// 'startIndex' is the first index created in this term (see
|
||||
// its description above).
|
||||
func newCommitment(commitCh chan struct{}, configuration Configuration, startIndex uint64) *commitment {
|
||||
matchIndexes := make(map[ServerID]uint64)
|
||||
for _, server := range configuration.Servers {
|
||||
if server.Suffrage == Voter {
|
||||
matchIndexes[server.ID] = 0
|
||||
}
|
||||
}
|
||||
return &commitment{
|
||||
commitCh: commitCh,
|
||||
matchIndexes: matchIndexes,
|
||||
commitIndex: 0,
|
||||
startIndex: startIndex,
|
||||
}
|
||||
}
|
||||
|
||||
// Called when a new cluster membership configuration is created: it will be
|
||||
// used to determine commitment from now on. 'configuration' is the servers in
|
||||
// the cluster.
|
||||
func (c *commitment) setConfiguration(configuration Configuration) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
oldMatchIndexes := c.matchIndexes
|
||||
c.matchIndexes = make(map[ServerID]uint64)
|
||||
for _, server := range configuration.Servers {
|
||||
if server.Suffrage == Voter {
|
||||
c.matchIndexes[server.ID] = oldMatchIndexes[server.ID] // defaults to 0
|
||||
}
|
||||
}
|
||||
c.recalculate()
|
||||
}
|
||||
|
||||
// Called by leader after commitCh is notified
|
||||
func (c *commitment) getCommitIndex() uint64 {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
return c.commitIndex
|
||||
}
|
||||
|
||||
// Match is called once a server completes writing entries to disk: either the
|
||||
// leader has written the new entry or a follower has replied to an
|
||||
// AppendEntries RPC. The given server's disk agrees with this server's log up
|
||||
// through the given index.
|
||||
func (c *commitment) match(server ServerID, matchIndex uint64) {
|
||||
c.Lock()
|
||||
defer c.Unlock()
|
||||
if prev, hasVote := c.matchIndexes[server]; hasVote && matchIndex > prev {
|
||||
c.matchIndexes[server] = matchIndex
|
||||
c.recalculate()
|
||||
}
|
||||
}
|
||||
|
||||
// Internal helper to calculate new commitIndex from matchIndexes.
|
||||
// Must be called with lock held.
|
||||
func (c *commitment) recalculate() {
|
||||
if len(c.matchIndexes) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
matched := make([]uint64, 0, len(c.matchIndexes))
|
||||
for _, idx := range c.matchIndexes {
|
||||
matched = append(matched, idx)
|
||||
}
|
||||
sort.Sort(uint64Slice(matched))
|
||||
quorumMatchIndex := matched[(len(matched)-1)/2]
|
||||
|
||||
if quorumMatchIndex > c.commitIndex && quorumMatchIndex >= c.startIndex {
|
||||
c.commitIndex = quorumMatchIndex
|
||||
asyncNotifyCh(c.commitCh)
|
||||
}
|
||||
}
|
||||
258
vendor/github.com/hashicorp/raft/config.go
generated
vendored
Normal file
258
vendor/github.com/hashicorp/raft/config.go
generated
vendored
Normal file
@@ -0,0 +1,258 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"time"
|
||||
)
|
||||
|
||||
// These are the versions of the protocol (which includes RPC messages as
|
||||
// well as Raft-specific log entries) that this server can _understand_. Use
|
||||
// the ProtocolVersion member of the Config object to control the version of
|
||||
// the protocol to use when _speaking_ to other servers. Note that depending on
|
||||
// the protocol version being spoken, some otherwise understood RPC messages
|
||||
// may be refused. See dispositionRPC for details of this logic.
|
||||
//
|
||||
// There are notes about the upgrade path in the description of the versions
|
||||
// below. If you are starting a fresh cluster then there's no reason not to
|
||||
// jump right to the latest protocol version. If you need to interoperate with
|
||||
// older, version 0 Raft servers you'll need to drive the cluster through the
|
||||
// different versions in order.
|
||||
//
|
||||
// The version details are complicated, but here's a summary of what's required
|
||||
// to get from a version 0 cluster to version 3:
|
||||
//
|
||||
// 1. In version N of your app that starts using the new Raft library with
|
||||
// versioning, set ProtocolVersion to 1.
|
||||
// 2. Make version N+1 of your app require version N as a prerequisite (all
|
||||
// servers must be upgraded). For version N+1 of your app set ProtocolVersion
|
||||
// to 2.
|
||||
// 3. Similarly, make version N+2 of your app require version N+1 as a
|
||||
// prerequisite. For version N+2 of your app, set ProtocolVersion to 3.
|
||||
//
|
||||
// During this upgrade, older cluster members will still have Server IDs equal
|
||||
// to their network addresses. To upgrade an older member and give it an ID, it
|
||||
// needs to leave the cluster and re-enter:
|
||||
//
|
||||
// 1. Remove the server from the cluster with RemoveServer, using its network
|
||||
// address as its ServerID.
|
||||
// 2. Update the server's config to a better ID (restarting the server).
|
||||
// 3. Add the server back to the cluster with AddVoter, using its new ID.
|
||||
//
|
||||
// You can do this during the rolling upgrade from N+1 to N+2 of your app, or
|
||||
// as a rolling change at any time after the upgrade.
|
||||
//
|
||||
// Version History
|
||||
//
|
||||
// 0: Original Raft library before versioning was added. Servers running this
|
||||
// version of the Raft library use AddPeerDeprecated/RemovePeerDeprecated
|
||||
// for all configuration changes, and have no support for LogConfiguration.
|
||||
// 1: First versioned protocol, used to interoperate with old servers, and begin
|
||||
// the migration path to newer versions of the protocol. Under this version
|
||||
// all configuration changes are propagated using the now-deprecated
|
||||
// RemovePeerDeprecated Raft log entry. This means that server IDs are always
|
||||
// set to be the same as the server addresses (since the old log entry type
|
||||
// cannot transmit an ID), and only AddPeer/RemovePeer APIs are supported.
|
||||
// Servers running this version of the protocol can understand the new
|
||||
// LogConfiguration Raft log entry but will never generate one so they can
|
||||
// remain compatible with version 0 Raft servers in the cluster.
|
||||
// 2: Transitional protocol used when migrating an existing cluster to the new
|
||||
// server ID system. Server IDs are still set to be the same as server
|
||||
// addresses, but all configuration changes are propagated using the new
|
||||
// LogConfiguration Raft log entry type, which can carry full ID information.
|
||||
// This version supports the old AddPeer/RemovePeer APIs as well as the new
|
||||
// ID-based AddVoter/RemoveServer APIs which should be used when adding
|
||||
// version 3 servers to the cluster later. This version sheds all
|
||||
// interoperability with version 0 servers, but can interoperate with newer
|
||||
// Raft servers running with protocol version 1 since they can understand the
|
||||
// new LogConfiguration Raft log entry, and this version can still understand
|
||||
// their RemovePeerDeprecated Raft log entries. We need this protocol version
|
||||
// as an intermediate step between 1 and 3 so that servers will propagate the
|
||||
// ID information that will come from newly-added (or -rolled) servers using
|
||||
// protocol version 3, but since they are still using their address-based IDs
|
||||
// from the previous step they will still be able to track commitments and
|
||||
// their own voting status properly. If we skipped this step, servers would
|
||||
// be started with their new IDs, but they wouldn't see themselves in the old
|
||||
// address-based configuration, so none of the servers would think they had a
|
||||
// vote.
|
||||
// 3: Protocol adding full support for server IDs and new ID-based server APIs
|
||||
// (AddVoter, AddNonvoter, etc.), old AddPeer/RemovePeer APIs are no longer
|
||||
// supported. Version 2 servers should be swapped out by removing them from
|
||||
// the cluster one-by-one and re-adding them with updated configuration for
|
||||
// this protocol version, along with their server ID. The remove/add cycle
|
||||
// is required to populate their server ID. Note that removing must be done
|
||||
// by ID, which will be the old server's address.
|
||||
type ProtocolVersion int
|
||||
|
||||
const (
|
||||
ProtocolVersionMin ProtocolVersion = 0
|
||||
ProtocolVersionMax = 3
|
||||
)
|
||||
|
||||
// These are versions of snapshots that this server can _understand_. Currently,
|
||||
// it is always assumed that this server generates the latest version, though
|
||||
// this may be changed in the future to include a configurable version.
|
||||
//
|
||||
// Version History
|
||||
//
|
||||
// 0: Original Raft library before versioning was added. The peers portion of
|
||||
// these snapshots is encoded in the legacy format which requires decodePeers
|
||||
// to parse. This version of snapshots should only be produced by the
|
||||
// unversioned Raft library.
|
||||
// 1: New format which adds support for a full configuration structure and its
|
||||
// associated log index, with support for server IDs and non-voting server
|
||||
// modes. To ease upgrades, this also includes the legacy peers structure but
|
||||
// that will never be used by servers that understand version 1 snapshots.
|
||||
// Since the original Raft library didn't enforce any versioning, we must
|
||||
// include the legacy peers structure for this version, but we can deprecate
|
||||
// it in the next snapshot version.
|
||||
type SnapshotVersion int
|
||||
|
||||
const (
|
||||
SnapshotVersionMin SnapshotVersion = 0
|
||||
SnapshotVersionMax = 1
|
||||
)
|
||||
|
||||
// Config provides any necessary configuration for the Raft server.
|
||||
type Config struct {
|
||||
// ProtocolVersion allows a Raft server to inter-operate with older
|
||||
// Raft servers running an older version of the code. This is used to
|
||||
// version the wire protocol as well as Raft-specific log entries that
|
||||
// the server uses when _speaking_ to other servers. There is currently
|
||||
// no auto-negotiation of versions so all servers must be manually
|
||||
// configured with compatible versions. See ProtocolVersionMin and
|
||||
// ProtocolVersionMax for the versions of the protocol that this server
|
||||
// can _understand_.
|
||||
ProtocolVersion ProtocolVersion
|
||||
|
||||
// HeartbeatTimeout specifies the time in follower state without
|
||||
// a leader before we attempt an election.
|
||||
HeartbeatTimeout time.Duration
|
||||
|
||||
// ElectionTimeout specifies the time in candidate state without
|
||||
// a leader before we attempt an election.
|
||||
ElectionTimeout time.Duration
|
||||
|
||||
// CommitTimeout controls the time without an Apply() operation
|
||||
// before we heartbeat to ensure a timely commit. Due to random
|
||||
// staggering, may be delayed as much as 2x this value.
|
||||
CommitTimeout time.Duration
|
||||
|
||||
// MaxAppendEntries controls the maximum number of append entries
|
||||
// to send at once. We want to strike a balance between efficiency
|
||||
// and avoiding waste if the follower is going to reject because of
|
||||
// an inconsistent log.
|
||||
MaxAppendEntries int
|
||||
|
||||
// If we are a member of a cluster, and RemovePeer is invoked for the
|
||||
// local node, then we forget all peers and transition into the follower state.
|
||||
// If ShutdownOnRemove is is set, we additional shutdown Raft. Otherwise,
|
||||
// we can become a leader of a cluster containing only this node.
|
||||
ShutdownOnRemove bool
|
||||
|
||||
// TrailingLogs controls how many logs we leave after a snapshot. This is
|
||||
// used so that we can quickly replay logs on a follower instead of being
|
||||
// forced to send an entire snapshot.
|
||||
TrailingLogs uint64
|
||||
|
||||
// SnapshotInterval controls how often we check if we should perform a snapshot.
|
||||
// We randomly stagger between this value and 2x this value to avoid the entire
|
||||
// cluster from performing a snapshot at once.
|
||||
SnapshotInterval time.Duration
|
||||
|
||||
// SnapshotThreshold controls how many outstanding logs there must be before
|
||||
// we perform a snapshot. This is to prevent excessive snapshots when we can
|
||||
// just replay a small set of logs.
|
||||
SnapshotThreshold uint64
|
||||
|
||||
// LeaderLeaseTimeout is used to control how long the "lease" lasts
|
||||
// for being the leader without being able to contact a quorum
|
||||
// of nodes. If we reach this interval without contact, we will
|
||||
// step down as leader.
|
||||
LeaderLeaseTimeout time.Duration
|
||||
|
||||
// StartAsLeader forces Raft to start in the leader state. This should
|
||||
// never be used except for testing purposes, as it can cause a split-brain.
|
||||
StartAsLeader bool
|
||||
|
||||
// The unique ID for this server across all time. When running with
|
||||
// ProtocolVersion < 3, you must set this to be the same as the network
|
||||
// address of your transport.
|
||||
LocalID ServerID
|
||||
|
||||
// NotifyCh is used to provide a channel that will be notified of leadership
|
||||
// changes. Raft will block writing to this channel, so it should either be
|
||||
// buffered or aggressively consumed.
|
||||
NotifyCh chan<- bool
|
||||
|
||||
// LogOutput is used as a sink for logs, unless Logger is specified.
|
||||
// Defaults to os.Stderr.
|
||||
LogOutput io.Writer
|
||||
|
||||
// Logger is a user-provided logger. If nil, a logger writing to LogOutput
|
||||
// is used.
|
||||
Logger *log.Logger
|
||||
}
|
||||
|
||||
// DefaultConfig returns a Config with usable defaults.
|
||||
func DefaultConfig() *Config {
|
||||
return &Config{
|
||||
ProtocolVersion: ProtocolVersionMax,
|
||||
HeartbeatTimeout: 1000 * time.Millisecond,
|
||||
ElectionTimeout: 1000 * time.Millisecond,
|
||||
CommitTimeout: 50 * time.Millisecond,
|
||||
MaxAppendEntries: 64,
|
||||
ShutdownOnRemove: true,
|
||||
TrailingLogs: 10240,
|
||||
SnapshotInterval: 120 * time.Second,
|
||||
SnapshotThreshold: 8192,
|
||||
LeaderLeaseTimeout: 500 * time.Millisecond,
|
||||
}
|
||||
}
|
||||
|
||||
// ValidateConfig is used to validate a sane configuration
|
||||
func ValidateConfig(config *Config) error {
|
||||
// We don't actually support running as 0 in the library any more, but
|
||||
// we do understand it.
|
||||
protocolMin := ProtocolVersionMin
|
||||
if protocolMin == 0 {
|
||||
protocolMin = 1
|
||||
}
|
||||
if config.ProtocolVersion < protocolMin ||
|
||||
config.ProtocolVersion > ProtocolVersionMax {
|
||||
return fmt.Errorf("Protocol version %d must be >= %d and <= %d",
|
||||
config.ProtocolVersion, protocolMin, ProtocolVersionMax)
|
||||
}
|
||||
if len(config.LocalID) == 0 {
|
||||
return fmt.Errorf("LocalID cannot be empty")
|
||||
}
|
||||
if config.HeartbeatTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Heartbeat timeout is too low")
|
||||
}
|
||||
if config.ElectionTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Election timeout is too low")
|
||||
}
|
||||
if config.CommitTimeout < time.Millisecond {
|
||||
return fmt.Errorf("Commit timeout is too low")
|
||||
}
|
||||
if config.MaxAppendEntries <= 0 {
|
||||
return fmt.Errorf("MaxAppendEntries must be positive")
|
||||
}
|
||||
if config.MaxAppendEntries > 1024 {
|
||||
return fmt.Errorf("MaxAppendEntries is too large")
|
||||
}
|
||||
if config.SnapshotInterval < 5*time.Millisecond {
|
||||
return fmt.Errorf("Snapshot interval is too low")
|
||||
}
|
||||
if config.LeaderLeaseTimeout < 5*time.Millisecond {
|
||||
return fmt.Errorf("Leader lease timeout is too low")
|
||||
}
|
||||
if config.LeaderLeaseTimeout > config.HeartbeatTimeout {
|
||||
return fmt.Errorf("Leader lease timeout cannot be larger than heartbeat timeout")
|
||||
}
|
||||
if config.ElectionTimeout < config.HeartbeatTimeout {
|
||||
return fmt.Errorf("Election timeout must be equal or greater than Heartbeat Timeout")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
343
vendor/github.com/hashicorp/raft/configuration.go
generated
vendored
Normal file
343
vendor/github.com/hashicorp/raft/configuration.go
generated
vendored
Normal file
@@ -0,0 +1,343 @@
|
||||
package raft
|
||||
|
||||
import "fmt"
|
||||
|
||||
// ServerSuffrage determines whether a Server in a Configuration gets a vote.
|
||||
type ServerSuffrage int
|
||||
|
||||
// Note: Don't renumber these, since the numbers are written into the log.
|
||||
const (
|
||||
// Voter is a server whose vote is counted in elections and whose match index
|
||||
// is used in advancing the leader's commit index.
|
||||
Voter ServerSuffrage = iota
|
||||
// Nonvoter is a server that receives log entries but is not considered for
|
||||
// elections or commitment purposes.
|
||||
Nonvoter
|
||||
// Staging is a server that acts like a nonvoter with one exception: once a
|
||||
// staging server receives enough log entries to be sufficiently caught up to
|
||||
// the leader's log, the leader will invoke a membership change to change
|
||||
// the Staging server to a Voter.
|
||||
Staging
|
||||
)
|
||||
|
||||
func (s ServerSuffrage) String() string {
|
||||
switch s {
|
||||
case Voter:
|
||||
return "Voter"
|
||||
case Nonvoter:
|
||||
return "Nonvoter"
|
||||
case Staging:
|
||||
return "Staging"
|
||||
}
|
||||
return "ServerSuffrage"
|
||||
}
|
||||
|
||||
// ServerID is a unique string identifying a server for all time.
|
||||
type ServerID string
|
||||
|
||||
// ServerAddress is a network address for a server that a transport can contact.
|
||||
type ServerAddress string
|
||||
|
||||
// Server tracks the information about a single server in a configuration.
|
||||
type Server struct {
|
||||
// Suffrage determines whether the server gets a vote.
|
||||
Suffrage ServerSuffrage
|
||||
// ID is a unique string identifying this server for all time.
|
||||
ID ServerID
|
||||
// Address is its network address that a transport can contact.
|
||||
Address ServerAddress
|
||||
}
|
||||
|
||||
// Configuration tracks which servers are in the cluster, and whether they have
|
||||
// votes. This should include the local server, if it's a member of the cluster.
|
||||
// The servers are listed no particular order, but each should only appear once.
|
||||
// These entries are appended to the log during membership changes.
|
||||
type Configuration struct {
|
||||
Servers []Server
|
||||
}
|
||||
|
||||
// Clone makes a deep copy of a Configuration.
|
||||
func (c *Configuration) Clone() (copy Configuration) {
|
||||
copy.Servers = append(copy.Servers, c.Servers...)
|
||||
return
|
||||
}
|
||||
|
||||
// ConfigurationChangeCommand is the different ways to change the cluster
|
||||
// configuration.
|
||||
type ConfigurationChangeCommand uint8
|
||||
|
||||
const (
|
||||
// AddStaging makes a server Staging unless its Voter.
|
||||
AddStaging ConfigurationChangeCommand = iota
|
||||
// AddNonvoter makes a server Nonvoter unless its Staging or Voter.
|
||||
AddNonvoter
|
||||
// DemoteVoter makes a server Nonvoter unless its absent.
|
||||
DemoteVoter
|
||||
// RemoveServer removes a server entirely from the cluster membership.
|
||||
RemoveServer
|
||||
// Promote is created automatically by a leader; it turns a Staging server
|
||||
// into a Voter.
|
||||
Promote
|
||||
)
|
||||
|
||||
func (c ConfigurationChangeCommand) String() string {
|
||||
switch c {
|
||||
case AddStaging:
|
||||
return "AddStaging"
|
||||
case AddNonvoter:
|
||||
return "AddNonvoter"
|
||||
case DemoteVoter:
|
||||
return "DemoteVoter"
|
||||
case RemoveServer:
|
||||
return "RemoveServer"
|
||||
case Promote:
|
||||
return "Promote"
|
||||
}
|
||||
return "ConfigurationChangeCommand"
|
||||
}
|
||||
|
||||
// configurationChangeRequest describes a change that a leader would like to
|
||||
// make to its current configuration. It's used only within a single server
|
||||
// (never serialized into the log), as part of `configurationChangeFuture`.
|
||||
type configurationChangeRequest struct {
|
||||
command ConfigurationChangeCommand
|
||||
serverID ServerID
|
||||
serverAddress ServerAddress // only present for AddStaging, AddNonvoter
|
||||
// prevIndex, if nonzero, is the index of the only configuration upon which
|
||||
// this change may be applied; if another configuration entry has been
|
||||
// added in the meantime, this request will fail.
|
||||
prevIndex uint64
|
||||
}
|
||||
|
||||
// configurations is state tracked on every server about its Configurations.
|
||||
// Note that, per Diego's dissertation, there can be at most one uncommitted
|
||||
// configuration at a time (the next configuration may not be created until the
|
||||
// prior one has been committed).
|
||||
//
|
||||
// One downside to storing just two configurations is that if you try to take a
|
||||
// snahpsot when your state machine hasn't yet applied the committedIndex, we
|
||||
// have no record of the configuration that would logically fit into that
|
||||
// snapshot. We disallow snapshots in that case now. An alternative approach,
|
||||
// which LogCabin uses, is to track every configuration change in the
|
||||
// log.
|
||||
type configurations struct {
|
||||
// committed is the latest configuration in the log/snapshot that has been
|
||||
// committed (the one with the largest index).
|
||||
committed Configuration
|
||||
// committedIndex is the log index where 'committed' was written.
|
||||
committedIndex uint64
|
||||
// latest is the latest configuration in the log/snapshot (may be committed
|
||||
// or uncommitted)
|
||||
latest Configuration
|
||||
// latestIndex is the log index where 'latest' was written.
|
||||
latestIndex uint64
|
||||
}
|
||||
|
||||
// Clone makes a deep copy of a configurations object.
|
||||
func (c *configurations) Clone() (copy configurations) {
|
||||
copy.committed = c.committed.Clone()
|
||||
copy.committedIndex = c.committedIndex
|
||||
copy.latest = c.latest.Clone()
|
||||
copy.latestIndex = c.latestIndex
|
||||
return
|
||||
}
|
||||
|
||||
// hasVote returns true if the server identified by 'id' is a Voter in the
|
||||
// provided Configuration.
|
||||
func hasVote(configuration Configuration, id ServerID) bool {
|
||||
for _, server := range configuration.Servers {
|
||||
if server.ID == id {
|
||||
return server.Suffrage == Voter
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// checkConfiguration tests a cluster membership configuration for common
|
||||
// errors.
|
||||
func checkConfiguration(configuration Configuration) error {
|
||||
idSet := make(map[ServerID]bool)
|
||||
addressSet := make(map[ServerAddress]bool)
|
||||
var voters int
|
||||
for _, server := range configuration.Servers {
|
||||
if server.ID == "" {
|
||||
return fmt.Errorf("Empty ID in configuration: %v", configuration)
|
||||
}
|
||||
if server.Address == "" {
|
||||
return fmt.Errorf("Empty address in configuration: %v", server)
|
||||
}
|
||||
if idSet[server.ID] {
|
||||
return fmt.Errorf("Found duplicate ID in configuration: %v", server.ID)
|
||||
}
|
||||
idSet[server.ID] = true
|
||||
if addressSet[server.Address] {
|
||||
return fmt.Errorf("Found duplicate address in configuration: %v", server.Address)
|
||||
}
|
||||
addressSet[server.Address] = true
|
||||
if server.Suffrage == Voter {
|
||||
voters++
|
||||
}
|
||||
}
|
||||
if voters == 0 {
|
||||
return fmt.Errorf("Need at least one voter in configuration: %v", configuration)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// nextConfiguration generates a new Configuration from the current one and a
|
||||
// configuration change request. It's split from appendConfigurationEntry so
|
||||
// that it can be unit tested easily.
|
||||
func nextConfiguration(current Configuration, currentIndex uint64, change configurationChangeRequest) (Configuration, error) {
|
||||
if change.prevIndex > 0 && change.prevIndex != currentIndex {
|
||||
return Configuration{}, fmt.Errorf("Configuration changed since %v (latest is %v)", change.prevIndex, currentIndex)
|
||||
}
|
||||
|
||||
configuration := current.Clone()
|
||||
switch change.command {
|
||||
case AddStaging:
|
||||
// TODO: barf on new address?
|
||||
newServer := Server{
|
||||
// TODO: This should add the server as Staging, to be automatically
|
||||
// promoted to Voter later. However, the promoton to Voter is not yet
|
||||
// implemented, and doing so is not trivial with the way the leader loop
|
||||
// coordinates with the replication goroutines today. So, for now, the
|
||||
// server will have a vote right away, and the Promote case below is
|
||||
// unused.
|
||||
Suffrage: Voter,
|
||||
ID: change.serverID,
|
||||
Address: change.serverAddress,
|
||||
}
|
||||
found := false
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID {
|
||||
if server.Suffrage == Voter {
|
||||
configuration.Servers[i].Address = change.serverAddress
|
||||
} else {
|
||||
configuration.Servers[i] = newServer
|
||||
}
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
configuration.Servers = append(configuration.Servers, newServer)
|
||||
}
|
||||
case AddNonvoter:
|
||||
newServer := Server{
|
||||
Suffrage: Nonvoter,
|
||||
ID: change.serverID,
|
||||
Address: change.serverAddress,
|
||||
}
|
||||
found := false
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID {
|
||||
if server.Suffrage != Nonvoter {
|
||||
configuration.Servers[i].Address = change.serverAddress
|
||||
} else {
|
||||
configuration.Servers[i] = newServer
|
||||
}
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
configuration.Servers = append(configuration.Servers, newServer)
|
||||
}
|
||||
case DemoteVoter:
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID {
|
||||
configuration.Servers[i].Suffrage = Nonvoter
|
||||
break
|
||||
}
|
||||
}
|
||||
case RemoveServer:
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID {
|
||||
configuration.Servers = append(configuration.Servers[:i], configuration.Servers[i+1:]...)
|
||||
break
|
||||
}
|
||||
}
|
||||
case Promote:
|
||||
for i, server := range configuration.Servers {
|
||||
if server.ID == change.serverID && server.Suffrage == Staging {
|
||||
configuration.Servers[i].Suffrage = Voter
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we didn't do something bad like remove the last voter
|
||||
if err := checkConfiguration(configuration); err != nil {
|
||||
return Configuration{}, err
|
||||
}
|
||||
|
||||
return configuration, nil
|
||||
}
|
||||
|
||||
// encodePeers is used to serialize a Configuration into the old peers format.
|
||||
// This is here for backwards compatibility when operating with a mix of old
|
||||
// servers and should be removed once we deprecate support for protocol version 1.
|
||||
func encodePeers(configuration Configuration, trans Transport) []byte {
|
||||
// Gather up all the voters, other suffrage types are not supported by
|
||||
// this data format.
|
||||
var encPeers [][]byte
|
||||
for _, server := range configuration.Servers {
|
||||
if server.Suffrage == Voter {
|
||||
encPeers = append(encPeers, trans.EncodePeer(server.ID, server.Address))
|
||||
}
|
||||
}
|
||||
|
||||
// Encode the entire array.
|
||||
buf, err := encodeMsgPack(encPeers)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to encode peers: %v", err))
|
||||
}
|
||||
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// decodePeers is used to deserialize an old list of peers into a Configuration.
|
||||
// This is here for backwards compatibility with old log entries and snapshots;
|
||||
// it should be removed eventually.
|
||||
func decodePeers(buf []byte, trans Transport) Configuration {
|
||||
// Decode the buffer first.
|
||||
var encPeers [][]byte
|
||||
if err := decodeMsgPack(buf, &encPeers); err != nil {
|
||||
panic(fmt.Errorf("failed to decode peers: %v", err))
|
||||
}
|
||||
|
||||
// Deserialize each peer.
|
||||
var servers []Server
|
||||
for _, enc := range encPeers {
|
||||
p := trans.DecodePeer(enc)
|
||||
servers = append(servers, Server{
|
||||
Suffrage: Voter,
|
||||
ID: ServerID(p),
|
||||
Address: ServerAddress(p),
|
||||
})
|
||||
}
|
||||
|
||||
return Configuration{
|
||||
Servers: servers,
|
||||
}
|
||||
}
|
||||
|
||||
// encodeConfiguration serializes a Configuration using MsgPack, or panics on
|
||||
// errors.
|
||||
func encodeConfiguration(configuration Configuration) []byte {
|
||||
buf, err := encodeMsgPack(configuration)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to encode configuration: %v", err))
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
// decodeConfiguration deserializes a Configuration using MsgPack, or panics on
|
||||
// errors.
|
||||
func decodeConfiguration(buf []byte) Configuration {
|
||||
var configuration Configuration
|
||||
if err := decodeMsgPack(buf, &configuration); err != nil {
|
||||
panic(fmt.Errorf("failed to decode configuration: %v", err))
|
||||
}
|
||||
return configuration
|
||||
}
|
||||
49
vendor/github.com/hashicorp/raft/discard_snapshot.go
generated
vendored
Normal file
49
vendor/github.com/hashicorp/raft/discard_snapshot.go
generated
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
)
|
||||
|
||||
// DiscardSnapshotStore is used to successfully snapshot while
|
||||
// always discarding the snapshot. This is useful for when the
|
||||
// log should be truncated but no snapshot should be retained.
|
||||
// This should never be used for production use, and is only
|
||||
// suitable for testing.
|
||||
type DiscardSnapshotStore struct{}
|
||||
|
||||
type DiscardSnapshotSink struct{}
|
||||
|
||||
// NewDiscardSnapshotStore is used to create a new DiscardSnapshotStore.
|
||||
func NewDiscardSnapshotStore() *DiscardSnapshotStore {
|
||||
return &DiscardSnapshotStore{}
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) Create(version SnapshotVersion, index, term uint64,
|
||||
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
|
||||
return &DiscardSnapshotSink{}, nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
return nil, nil, fmt.Errorf("open is not supported")
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Write(b []byte) (int, error) {
|
||||
return len(b), nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) ID() string {
|
||||
return "discard"
|
||||
}
|
||||
|
||||
func (d *DiscardSnapshotSink) Cancel() error {
|
||||
return nil
|
||||
}
|
||||
528
vendor/github.com/hashicorp/raft/file_snapshot.go
generated
vendored
Normal file
528
vendor/github.com/hashicorp/raft/file_snapshot.go
generated
vendored
Normal file
@@ -0,0 +1,528 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash"
|
||||
"hash/crc64"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
testPath = "permTest"
|
||||
snapPath = "snapshots"
|
||||
metaFilePath = "meta.json"
|
||||
stateFilePath = "state.bin"
|
||||
tmpSuffix = ".tmp"
|
||||
)
|
||||
|
||||
// FileSnapshotStore implements the SnapshotStore interface and allows
|
||||
// snapshots to be made on the local disk.
|
||||
type FileSnapshotStore struct {
|
||||
path string
|
||||
retain int
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
type snapMetaSlice []*fileSnapshotMeta
|
||||
|
||||
// FileSnapshotSink implements SnapshotSink with a file.
|
||||
type FileSnapshotSink struct {
|
||||
store *FileSnapshotStore
|
||||
logger *log.Logger
|
||||
dir string
|
||||
parentDir string
|
||||
meta fileSnapshotMeta
|
||||
|
||||
stateFile *os.File
|
||||
stateHash hash.Hash64
|
||||
buffered *bufio.Writer
|
||||
|
||||
closed bool
|
||||
}
|
||||
|
||||
// fileSnapshotMeta is stored on disk. We also put a CRC
|
||||
// on disk so that we can verify the snapshot.
|
||||
type fileSnapshotMeta struct {
|
||||
SnapshotMeta
|
||||
CRC []byte
|
||||
}
|
||||
|
||||
// bufferedFile is returned when we open a snapshot. This way
|
||||
// reads are buffered and the file still gets closed.
|
||||
type bufferedFile struct {
|
||||
bh *bufio.Reader
|
||||
fh *os.File
|
||||
}
|
||||
|
||||
func (b *bufferedFile) Read(p []byte) (n int, err error) {
|
||||
return b.bh.Read(p)
|
||||
}
|
||||
|
||||
func (b *bufferedFile) Close() error {
|
||||
return b.fh.Close()
|
||||
}
|
||||
|
||||
// NewFileSnapshotStoreWithLogger creates a new FileSnapshotStore based
|
||||
// on a base directory. The `retain` parameter controls how many
|
||||
// snapshots are retained. Must be at least 1.
|
||||
func NewFileSnapshotStoreWithLogger(base string, retain int, logger *log.Logger) (*FileSnapshotStore, error) {
|
||||
if retain < 1 {
|
||||
return nil, fmt.Errorf("must retain at least one snapshot")
|
||||
}
|
||||
if logger == nil {
|
||||
logger = log.New(os.Stderr, "", log.LstdFlags)
|
||||
}
|
||||
|
||||
// Ensure our path exists
|
||||
path := filepath.Join(base, snapPath)
|
||||
if err := os.MkdirAll(path, 0755); err != nil && !os.IsExist(err) {
|
||||
return nil, fmt.Errorf("snapshot path not accessible: %v", err)
|
||||
}
|
||||
|
||||
// Setup the store
|
||||
store := &FileSnapshotStore{
|
||||
path: path,
|
||||
retain: retain,
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
// Do a permissions test
|
||||
if err := store.testPermissions(); err != nil {
|
||||
return nil, fmt.Errorf("permissions test failed: %v", err)
|
||||
}
|
||||
return store, nil
|
||||
}
|
||||
|
||||
// NewFileSnapshotStore creates a new FileSnapshotStore based
|
||||
// on a base directory. The `retain` parameter controls how many
|
||||
// snapshots are retained. Must be at least 1.
|
||||
func NewFileSnapshotStore(base string, retain int, logOutput io.Writer) (*FileSnapshotStore, error) {
|
||||
if logOutput == nil {
|
||||
logOutput = os.Stderr
|
||||
}
|
||||
return NewFileSnapshotStoreWithLogger(base, retain, log.New(logOutput, "", log.LstdFlags))
|
||||
}
|
||||
|
||||
// testPermissions tries to touch a file in our path to see if it works.
|
||||
func (f *FileSnapshotStore) testPermissions() error {
|
||||
path := filepath.Join(f.path, testPath)
|
||||
fh, err := os.Create(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = fh.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = os.Remove(path); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// snapshotName generates a name for the snapshot.
|
||||
func snapshotName(term, index uint64) string {
|
||||
now := time.Now()
|
||||
msec := now.UnixNano() / int64(time.Millisecond)
|
||||
return fmt.Sprintf("%d-%d-%d", term, index, msec)
|
||||
}
|
||||
|
||||
// Create is used to start a new snapshot
|
||||
func (f *FileSnapshotStore) Create(version SnapshotVersion, index, term uint64,
|
||||
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
|
||||
// We only support version 1 snapshots at this time.
|
||||
if version != 1 {
|
||||
return nil, fmt.Errorf("unsupported snapshot version %d", version)
|
||||
}
|
||||
|
||||
// Create a new path
|
||||
name := snapshotName(term, index)
|
||||
path := filepath.Join(f.path, name+tmpSuffix)
|
||||
f.logger.Printf("[INFO] snapshot: Creating new snapshot at %s", path)
|
||||
|
||||
// Make the directory
|
||||
if err := os.MkdirAll(path, 0755); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to make snapshot directory: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the sink
|
||||
sink := &FileSnapshotSink{
|
||||
store: f,
|
||||
logger: f.logger,
|
||||
dir: path,
|
||||
parentDir: f.path,
|
||||
meta: fileSnapshotMeta{
|
||||
SnapshotMeta: SnapshotMeta{
|
||||
Version: version,
|
||||
ID: name,
|
||||
Index: index,
|
||||
Term: term,
|
||||
Peers: encodePeers(configuration, trans),
|
||||
Configuration: configuration,
|
||||
ConfigurationIndex: configurationIndex,
|
||||
},
|
||||
CRC: nil,
|
||||
},
|
||||
}
|
||||
|
||||
// Write out the meta data
|
||||
if err := sink.writeMeta(); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Open the state file
|
||||
statePath := filepath.Join(path, stateFilePath)
|
||||
fh, err := os.Create(statePath)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to create state file: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
sink.stateFile = fh
|
||||
|
||||
// Create a CRC64 hash
|
||||
sink.stateHash = crc64.New(crc64.MakeTable(crc64.ECMA))
|
||||
|
||||
// Wrap both the hash and file in a MultiWriter with buffering
|
||||
multi := io.MultiWriter(sink.stateFile, sink.stateHash)
|
||||
sink.buffered = bufio.NewWriter(multi)
|
||||
|
||||
// Done
|
||||
return sink, nil
|
||||
}
|
||||
|
||||
// List returns available snapshots in the store.
|
||||
func (f *FileSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
// Get the eligible snapshots
|
||||
snapshots, err := f.getSnapshots()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var snapMeta []*SnapshotMeta
|
||||
for _, meta := range snapshots {
|
||||
snapMeta = append(snapMeta, &meta.SnapshotMeta)
|
||||
if len(snapMeta) == f.retain {
|
||||
break
|
||||
}
|
||||
}
|
||||
return snapMeta, nil
|
||||
}
|
||||
|
||||
// getSnapshots returns all the known snapshots.
|
||||
func (f *FileSnapshotStore) getSnapshots() ([]*fileSnapshotMeta, error) {
|
||||
// Get the eligible snapshots
|
||||
snapshots, err := ioutil.ReadDir(f.path)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to scan snapshot dir: %v", err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Populate the metadata
|
||||
var snapMeta []*fileSnapshotMeta
|
||||
for _, snap := range snapshots {
|
||||
// Ignore any files
|
||||
if !snap.IsDir() {
|
||||
continue
|
||||
}
|
||||
|
||||
// Ignore any temporary snapshots
|
||||
dirName := snap.Name()
|
||||
if strings.HasSuffix(dirName, tmpSuffix) {
|
||||
f.logger.Printf("[WARN] snapshot: Found temporary snapshot: %v", dirName)
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to read the meta data
|
||||
meta, err := f.readMeta(dirName)
|
||||
if err != nil {
|
||||
f.logger.Printf("[WARN] snapshot: Failed to read metadata for %v: %v", dirName, err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Make sure we can understand this version.
|
||||
if meta.Version < SnapshotVersionMin || meta.Version > SnapshotVersionMax {
|
||||
f.logger.Printf("[WARN] snapshot: Snapshot version for %v not supported: %d", dirName, meta.Version)
|
||||
continue
|
||||
}
|
||||
|
||||
// Append, but only return up to the retain count
|
||||
snapMeta = append(snapMeta, meta)
|
||||
}
|
||||
|
||||
// Sort the snapshot, reverse so we get new -> old
|
||||
sort.Sort(sort.Reverse(snapMetaSlice(snapMeta)))
|
||||
|
||||
return snapMeta, nil
|
||||
}
|
||||
|
||||
// readMeta is used to read the meta data for a given named backup
|
||||
func (f *FileSnapshotStore) readMeta(name string) (*fileSnapshotMeta, error) {
|
||||
// Open the meta file
|
||||
metaPath := filepath.Join(f.path, name, metaFilePath)
|
||||
fh, err := os.Open(metaPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Buffer the file IO
|
||||
buffered := bufio.NewReader(fh)
|
||||
|
||||
// Read in the JSON
|
||||
meta := &fileSnapshotMeta{}
|
||||
dec := json.NewDecoder(buffered)
|
||||
if err := dec.Decode(meta); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return meta, nil
|
||||
}
|
||||
|
||||
// Open takes a snapshot ID and returns a ReadCloser for that snapshot.
|
||||
func (f *FileSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
// Get the metadata
|
||||
meta, err := f.readMeta(id)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get meta data to open snapshot: %v", err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Open the state file
|
||||
statePath := filepath.Join(f.path, id, stateFilePath)
|
||||
fh, err := os.Open(statePath)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to open state file: %v", err)
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Create a CRC64 hash
|
||||
stateHash := crc64.New(crc64.MakeTable(crc64.ECMA))
|
||||
|
||||
// Compute the hash
|
||||
_, err = io.Copy(stateHash, fh)
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to read state file: %v", err)
|
||||
fh.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Verify the hash
|
||||
computed := stateHash.Sum(nil)
|
||||
if bytes.Compare(meta.CRC, computed) != 0 {
|
||||
f.logger.Printf("[ERR] snapshot: CRC checksum failed (stored: %v computed: %v)",
|
||||
meta.CRC, computed)
|
||||
fh.Close()
|
||||
return nil, nil, fmt.Errorf("CRC mismatch")
|
||||
}
|
||||
|
||||
// Seek to the start
|
||||
if _, err := fh.Seek(0, 0); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: State file seek failed: %v", err)
|
||||
fh.Close()
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
// Return a buffered file
|
||||
buffered := &bufferedFile{
|
||||
bh: bufio.NewReader(fh),
|
||||
fh: fh,
|
||||
}
|
||||
|
||||
return &meta.SnapshotMeta, buffered, nil
|
||||
}
|
||||
|
||||
// ReapSnapshots reaps any snapshots beyond the retain count.
|
||||
func (f *FileSnapshotStore) ReapSnapshots() error {
|
||||
snapshots, err := f.getSnapshots()
|
||||
if err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to get snapshots: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
for i := f.retain; i < len(snapshots); i++ {
|
||||
path := filepath.Join(f.path, snapshots[i].ID)
|
||||
f.logger.Printf("[INFO] snapshot: reaping snapshot %v", path)
|
||||
if err := os.RemoveAll(path); err != nil {
|
||||
f.logger.Printf("[ERR] snapshot: Failed to reap snapshot %v: %v", path, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ID returns the ID of the snapshot, can be used with Open()
|
||||
// after the snapshot is finalized.
|
||||
func (s *FileSnapshotSink) ID() string {
|
||||
return s.meta.ID
|
||||
}
|
||||
|
||||
// Write is used to append to the state file. We write to the
|
||||
// buffered IO object to reduce the amount of context switches.
|
||||
func (s *FileSnapshotSink) Write(b []byte) (int, error) {
|
||||
return s.buffered.Write(b)
|
||||
}
|
||||
|
||||
// Close is used to indicate a successful end.
|
||||
func (s *FileSnapshotSink) Close() error {
|
||||
// Make sure close is idempotent
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
|
||||
// Close the open handles
|
||||
if err := s.finalize(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
|
||||
if delErr := os.RemoveAll(s.dir); delErr != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to delete temporary snapshot directory at path %v: %v", s.dir, delErr)
|
||||
return delErr
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Write out the meta data
|
||||
if err := s.writeMeta(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to write metadata: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Move the directory into place
|
||||
newPath := strings.TrimSuffix(s.dir, tmpSuffix)
|
||||
if err := os.Rename(s.dir, newPath); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to move snapshot into place: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
if runtime.GOOS != "windows" { //skipping fsync for directory entry edits on Windows, only needed for *nix style file systems
|
||||
parentFH, err := os.Open(s.parentDir)
|
||||
defer parentFH.Close()
|
||||
if err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to open snapshot parent directory %v, error: %v", s.parentDir, err)
|
||||
return err
|
||||
}
|
||||
|
||||
if err = parentFH.Sync(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed syncing parent directory %v, error: %v", s.parentDir, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Reap any old snapshots
|
||||
if err := s.store.ReapSnapshots(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Cancel is used to indicate an unsuccessful end.
|
||||
func (s *FileSnapshotSink) Cancel() error {
|
||||
// Make sure close is idempotent
|
||||
if s.closed {
|
||||
return nil
|
||||
}
|
||||
s.closed = true
|
||||
|
||||
// Close the open handles
|
||||
if err := s.finalize(); err != nil {
|
||||
s.logger.Printf("[ERR] snapshot: Failed to finalize snapshot: %v", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Attempt to remove all artifacts
|
||||
return os.RemoveAll(s.dir)
|
||||
}
|
||||
|
||||
// finalize is used to close all of our resources.
|
||||
func (s *FileSnapshotSink) finalize() error {
|
||||
// Flush any remaining data
|
||||
if err := s.buffered.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Sync to force fsync to disk
|
||||
if err := s.stateFile.Sync(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get the file size
|
||||
stat, statErr := s.stateFile.Stat()
|
||||
|
||||
// Close the file
|
||||
if err := s.stateFile.Close(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the file size, check after we close
|
||||
if statErr != nil {
|
||||
return statErr
|
||||
}
|
||||
s.meta.Size = stat.Size()
|
||||
|
||||
// Set the CRC
|
||||
s.meta.CRC = s.stateHash.Sum(nil)
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeMeta is used to write out the metadata we have.
|
||||
func (s *FileSnapshotSink) writeMeta() error {
|
||||
// Open the meta file
|
||||
metaPath := filepath.Join(s.dir, metaFilePath)
|
||||
fh, err := os.Create(metaPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer fh.Close()
|
||||
|
||||
// Buffer the file IO
|
||||
buffered := bufio.NewWriter(fh)
|
||||
|
||||
// Write out as JSON
|
||||
enc := json.NewEncoder(buffered)
|
||||
if err := enc.Encode(&s.meta); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = buffered.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err = fh.Sync(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Implement the sort interface for []*fileSnapshotMeta.
|
||||
func (s snapMetaSlice) Len() int {
|
||||
return len(s)
|
||||
}
|
||||
|
||||
func (s snapMetaSlice) Less(i, j int) bool {
|
||||
if s[i].Term != s[j].Term {
|
||||
return s[i].Term < s[j].Term
|
||||
}
|
||||
if s[i].Index != s[j].Index {
|
||||
return s[i].Index < s[j].Index
|
||||
}
|
||||
return s[i].ID < s[j].ID
|
||||
}
|
||||
|
||||
func (s snapMetaSlice) Swap(i, j int) {
|
||||
s[i], s[j] = s[j], s[i]
|
||||
}
|
||||
136
vendor/github.com/hashicorp/raft/fsm.go
generated
vendored
Normal file
136
vendor/github.com/hashicorp/raft/fsm.go
generated
vendored
Normal file
@@ -0,0 +1,136 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
)
|
||||
|
||||
// FSM provides an interface that can be implemented by
|
||||
// clients to make use of the replicated log.
|
||||
type FSM interface {
|
||||
// Apply log is invoked once a log entry is committed.
|
||||
// It returns a value which will be made available in the
|
||||
// ApplyFuture returned by Raft.Apply method if that
|
||||
// method was called on the same Raft node as the FSM.
|
||||
Apply(*Log) interface{}
|
||||
|
||||
// Snapshot is used to support log compaction. This call should
|
||||
// return an FSMSnapshot which can be used to save a point-in-time
|
||||
// snapshot of the FSM. Apply and Snapshot are not called in multiple
|
||||
// threads, but Apply will be called concurrently with Persist. This means
|
||||
// the FSM should be implemented in a fashion that allows for concurrent
|
||||
// updates while a snapshot is happening.
|
||||
Snapshot() (FSMSnapshot, error)
|
||||
|
||||
// Restore is used to restore an FSM from a snapshot. It is not called
|
||||
// concurrently with any other command. The FSM must discard all previous
|
||||
// state.
|
||||
Restore(io.ReadCloser) error
|
||||
}
|
||||
|
||||
// FSMSnapshot is returned by an FSM in response to a Snapshot
|
||||
// It must be safe to invoke FSMSnapshot methods with concurrent
|
||||
// calls to Apply.
|
||||
type FSMSnapshot interface {
|
||||
// Persist should dump all necessary state to the WriteCloser 'sink',
|
||||
// and call sink.Close() when finished or call sink.Cancel() on error.
|
||||
Persist(sink SnapshotSink) error
|
||||
|
||||
// Release is invoked when we are finished with the snapshot.
|
||||
Release()
|
||||
}
|
||||
|
||||
// runFSM is a long running goroutine responsible for applying logs
|
||||
// to the FSM. This is done async of other logs since we don't want
|
||||
// the FSM to block our internal operations.
|
||||
func (r *Raft) runFSM() {
|
||||
var lastIndex, lastTerm uint64
|
||||
|
||||
commit := func(req *commitTuple) {
|
||||
// Apply the log if a command
|
||||
var resp interface{}
|
||||
if req.log.Type == LogCommand {
|
||||
start := time.Now()
|
||||
resp = r.fsm.Apply(req.log)
|
||||
metrics.MeasureSince([]string{"raft", "fsm", "apply"}, start)
|
||||
}
|
||||
|
||||
// Update the indexes
|
||||
lastIndex = req.log.Index
|
||||
lastTerm = req.log.Term
|
||||
|
||||
// Invoke the future if given
|
||||
if req.future != nil {
|
||||
req.future.response = resp
|
||||
req.future.respond(nil)
|
||||
}
|
||||
}
|
||||
|
||||
restore := func(req *restoreFuture) {
|
||||
// Open the snapshot
|
||||
meta, source, err := r.snapshots.Open(req.ID)
|
||||
if err != nil {
|
||||
req.respond(fmt.Errorf("failed to open snapshot %v: %v", req.ID, err))
|
||||
return
|
||||
}
|
||||
|
||||
// Attempt to restore
|
||||
start := time.Now()
|
||||
if err := r.fsm.Restore(source); err != nil {
|
||||
req.respond(fmt.Errorf("failed to restore snapshot %v: %v", req.ID, err))
|
||||
source.Close()
|
||||
return
|
||||
}
|
||||
source.Close()
|
||||
metrics.MeasureSince([]string{"raft", "fsm", "restore"}, start)
|
||||
|
||||
// Update the last index and term
|
||||
lastIndex = meta.Index
|
||||
lastTerm = meta.Term
|
||||
req.respond(nil)
|
||||
}
|
||||
|
||||
snapshot := func(req *reqSnapshotFuture) {
|
||||
// Is there something to snapshot?
|
||||
if lastIndex == 0 {
|
||||
req.respond(ErrNothingNewToSnapshot)
|
||||
return
|
||||
}
|
||||
|
||||
// Start a snapshot
|
||||
start := time.Now()
|
||||
snap, err := r.fsm.Snapshot()
|
||||
metrics.MeasureSince([]string{"raft", "fsm", "snapshot"}, start)
|
||||
|
||||
// Respond to the request
|
||||
req.index = lastIndex
|
||||
req.term = lastTerm
|
||||
req.snapshot = snap
|
||||
req.respond(err)
|
||||
}
|
||||
|
||||
for {
|
||||
select {
|
||||
case ptr := <-r.fsmMutateCh:
|
||||
switch req := ptr.(type) {
|
||||
case *commitTuple:
|
||||
commit(req)
|
||||
|
||||
case *restoreFuture:
|
||||
restore(req)
|
||||
|
||||
default:
|
||||
panic(fmt.Errorf("bad type passed to fsmMutateCh: %#v", ptr))
|
||||
}
|
||||
|
||||
case req := <-r.fsmSnapshotCh:
|
||||
snapshot(req)
|
||||
|
||||
case <-r.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
289
vendor/github.com/hashicorp/raft/future.go
generated
vendored
Normal file
289
vendor/github.com/hashicorp/raft/future.go
generated
vendored
Normal file
@@ -0,0 +1,289 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Future is used to represent an action that may occur in the future.
|
||||
type Future interface {
|
||||
// Error blocks until the future arrives and then
|
||||
// returns the error status of the future.
|
||||
// This may be called any number of times - all
|
||||
// calls will return the same value.
|
||||
// Note that it is not OK to call this method
|
||||
// twice concurrently on the same Future instance.
|
||||
Error() error
|
||||
}
|
||||
|
||||
// IndexFuture is used for future actions that can result in a raft log entry
|
||||
// being created.
|
||||
type IndexFuture interface {
|
||||
Future
|
||||
|
||||
// Index holds the index of the newly applied log entry.
|
||||
// This must not be called until after the Error method has returned.
|
||||
Index() uint64
|
||||
}
|
||||
|
||||
// ApplyFuture is used for Apply and can return the FSM response.
|
||||
type ApplyFuture interface {
|
||||
IndexFuture
|
||||
|
||||
// Response returns the FSM response as returned
|
||||
// by the FSM.Apply method. This must not be called
|
||||
// until after the Error method has returned.
|
||||
Response() interface{}
|
||||
}
|
||||
|
||||
// ConfigurationFuture is used for GetConfiguration and can return the
|
||||
// latest configuration in use by Raft.
|
||||
type ConfigurationFuture interface {
|
||||
IndexFuture
|
||||
|
||||
// Configuration contains the latest configuration. This must
|
||||
// not be called until after the Error method has returned.
|
||||
Configuration() Configuration
|
||||
}
|
||||
|
||||
// SnapshotFuture is used for waiting on a user-triggered snapshot to complete.
|
||||
type SnapshotFuture interface {
|
||||
Future
|
||||
|
||||
// Open is a function you can call to access the underlying snapshot and
|
||||
// its metadata. This must not be called until after the Error method
|
||||
// has returned.
|
||||
Open() (*SnapshotMeta, io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// errorFuture is used to return a static error.
|
||||
type errorFuture struct {
|
||||
err error
|
||||
}
|
||||
|
||||
func (e errorFuture) Error() error {
|
||||
return e.err
|
||||
}
|
||||
|
||||
func (e errorFuture) Response() interface{} {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (e errorFuture) Index() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// deferError can be embedded to allow a future
|
||||
// to provide an error in the future.
|
||||
type deferError struct {
|
||||
err error
|
||||
errCh chan error
|
||||
responded bool
|
||||
}
|
||||
|
||||
func (d *deferError) init() {
|
||||
d.errCh = make(chan error, 1)
|
||||
}
|
||||
|
||||
func (d *deferError) Error() error {
|
||||
if d.err != nil {
|
||||
// Note that when we've received a nil error, this
|
||||
// won't trigger, but the channel is closed after
|
||||
// send so we'll still return nil below.
|
||||
return d.err
|
||||
}
|
||||
if d.errCh == nil {
|
||||
panic("waiting for response on nil channel")
|
||||
}
|
||||
d.err = <-d.errCh
|
||||
return d.err
|
||||
}
|
||||
|
||||
func (d *deferError) respond(err error) {
|
||||
if d.errCh == nil {
|
||||
return
|
||||
}
|
||||
if d.responded {
|
||||
return
|
||||
}
|
||||
d.errCh <- err
|
||||
close(d.errCh)
|
||||
d.responded = true
|
||||
}
|
||||
|
||||
// There are several types of requests that cause a configuration entry to
|
||||
// be appended to the log. These are encoded here for leaderLoop() to process.
|
||||
// This is internal to a single server.
|
||||
type configurationChangeFuture struct {
|
||||
logFuture
|
||||
req configurationChangeRequest
|
||||
}
|
||||
|
||||
// bootstrapFuture is used to attempt a live bootstrap of the cluster. See the
|
||||
// Raft object's BootstrapCluster member function for more details.
|
||||
type bootstrapFuture struct {
|
||||
deferError
|
||||
|
||||
// configuration is the proposed bootstrap configuration to apply.
|
||||
configuration Configuration
|
||||
}
|
||||
|
||||
// logFuture is used to apply a log entry and waits until
|
||||
// the log is considered committed.
|
||||
type logFuture struct {
|
||||
deferError
|
||||
log Log
|
||||
response interface{}
|
||||
dispatch time.Time
|
||||
}
|
||||
|
||||
func (l *logFuture) Response() interface{} {
|
||||
return l.response
|
||||
}
|
||||
|
||||
func (l *logFuture) Index() uint64 {
|
||||
return l.log.Index
|
||||
}
|
||||
|
||||
type shutdownFuture struct {
|
||||
raft *Raft
|
||||
}
|
||||
|
||||
func (s *shutdownFuture) Error() error {
|
||||
if s.raft == nil {
|
||||
return nil
|
||||
}
|
||||
s.raft.waitShutdown()
|
||||
if closeable, ok := s.raft.trans.(WithClose); ok {
|
||||
closeable.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// userSnapshotFuture is used for waiting on a user-triggered snapshot to
|
||||
// complete.
|
||||
type userSnapshotFuture struct {
|
||||
deferError
|
||||
|
||||
// opener is a function used to open the snapshot. This is filled in
|
||||
// once the future returns with no error.
|
||||
opener func() (*SnapshotMeta, io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// Open is a function you can call to access the underlying snapshot and its
|
||||
// metadata.
|
||||
func (u *userSnapshotFuture) Open() (*SnapshotMeta, io.ReadCloser, error) {
|
||||
if u.opener == nil {
|
||||
return nil, nil, fmt.Errorf("no snapshot available")
|
||||
} else {
|
||||
// Invalidate the opener so it can't get called multiple times,
|
||||
// which isn't generally safe.
|
||||
defer func() {
|
||||
u.opener = nil
|
||||
}()
|
||||
return u.opener()
|
||||
}
|
||||
}
|
||||
|
||||
// userRestoreFuture is used for waiting on a user-triggered restore of an
|
||||
// external snapshot to complete.
|
||||
type userRestoreFuture struct {
|
||||
deferError
|
||||
|
||||
// meta is the metadata that belongs with the snapshot.
|
||||
meta *SnapshotMeta
|
||||
|
||||
// reader is the interface to read the snapshot contents from.
|
||||
reader io.Reader
|
||||
}
|
||||
|
||||
// reqSnapshotFuture is used for requesting a snapshot start.
|
||||
// It is only used internally.
|
||||
type reqSnapshotFuture struct {
|
||||
deferError
|
||||
|
||||
// snapshot details provided by the FSM runner before responding
|
||||
index uint64
|
||||
term uint64
|
||||
snapshot FSMSnapshot
|
||||
}
|
||||
|
||||
// restoreFuture is used for requesting an FSM to perform a
|
||||
// snapshot restore. Used internally only.
|
||||
type restoreFuture struct {
|
||||
deferError
|
||||
ID string
|
||||
}
|
||||
|
||||
// verifyFuture is used to verify the current node is still
|
||||
// the leader. This is to prevent a stale read.
|
||||
type verifyFuture struct {
|
||||
deferError
|
||||
notifyCh chan *verifyFuture
|
||||
quorumSize int
|
||||
votes int
|
||||
voteLock sync.Mutex
|
||||
}
|
||||
|
||||
// configurationsFuture is used to retrieve the current configurations. This is
|
||||
// used to allow safe access to this information outside of the main thread.
|
||||
type configurationsFuture struct {
|
||||
deferError
|
||||
configurations configurations
|
||||
}
|
||||
|
||||
// Configuration returns the latest configuration in use by Raft.
|
||||
func (c *configurationsFuture) Configuration() Configuration {
|
||||
return c.configurations.latest
|
||||
}
|
||||
|
||||
// Index returns the index of the latest configuration in use by Raft.
|
||||
func (c *configurationsFuture) Index() uint64 {
|
||||
return c.configurations.latestIndex
|
||||
}
|
||||
|
||||
// vote is used to respond to a verifyFuture.
|
||||
// This may block when responding on the notifyCh.
|
||||
func (v *verifyFuture) vote(leader bool) {
|
||||
v.voteLock.Lock()
|
||||
defer v.voteLock.Unlock()
|
||||
|
||||
// Guard against having notified already
|
||||
if v.notifyCh == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if leader {
|
||||
v.votes++
|
||||
if v.votes >= v.quorumSize {
|
||||
v.notifyCh <- v
|
||||
v.notifyCh = nil
|
||||
}
|
||||
} else {
|
||||
v.notifyCh <- v
|
||||
v.notifyCh = nil
|
||||
}
|
||||
}
|
||||
|
||||
// appendFuture is used for waiting on a pipelined append
|
||||
// entries RPC.
|
||||
type appendFuture struct {
|
||||
deferError
|
||||
start time.Time
|
||||
args *AppendEntriesRequest
|
||||
resp *AppendEntriesResponse
|
||||
}
|
||||
|
||||
func (a *appendFuture) Start() time.Time {
|
||||
return a.start
|
||||
}
|
||||
|
||||
func (a *appendFuture) Request() *AppendEntriesRequest {
|
||||
return a.args
|
||||
}
|
||||
|
||||
func (a *appendFuture) Response() *AppendEntriesResponse {
|
||||
return a.resp
|
||||
}
|
||||
106
vendor/github.com/hashicorp/raft/inmem_snapshot.go
generated
vendored
Normal file
106
vendor/github.com/hashicorp/raft/inmem_snapshot.go
generated
vendored
Normal file
@@ -0,0 +1,106 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// InmemSnapshotStore implements the SnapshotStore interface and
|
||||
// retains only the most recent snapshot
|
||||
type InmemSnapshotStore struct {
|
||||
latest *InmemSnapshotSink
|
||||
hasSnapshot bool
|
||||
sync.RWMutex
|
||||
}
|
||||
|
||||
// InmemSnapshotSink implements SnapshotSink in memory
|
||||
type InmemSnapshotSink struct {
|
||||
meta SnapshotMeta
|
||||
contents *bytes.Buffer
|
||||
}
|
||||
|
||||
// NewInmemSnapshotStore creates a blank new InmemSnapshotStore
|
||||
func NewInmemSnapshotStore() *InmemSnapshotStore {
|
||||
return &InmemSnapshotStore{
|
||||
latest: &InmemSnapshotSink{
|
||||
contents: &bytes.Buffer{},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// Create replaces the stored snapshot with a new one using the given args
|
||||
func (m *InmemSnapshotStore) Create(version SnapshotVersion, index, term uint64,
|
||||
configuration Configuration, configurationIndex uint64, trans Transport) (SnapshotSink, error) {
|
||||
// We only support version 1 snapshots at this time.
|
||||
if version != 1 {
|
||||
return nil, fmt.Errorf("unsupported snapshot version %d", version)
|
||||
}
|
||||
|
||||
name := snapshotName(term, index)
|
||||
|
||||
m.Lock()
|
||||
defer m.Unlock()
|
||||
|
||||
sink := &InmemSnapshotSink{
|
||||
meta: SnapshotMeta{
|
||||
Version: version,
|
||||
ID: name,
|
||||
Index: index,
|
||||
Term: term,
|
||||
Peers: encodePeers(configuration, trans),
|
||||
Configuration: configuration,
|
||||
ConfigurationIndex: configurationIndex,
|
||||
},
|
||||
contents: &bytes.Buffer{},
|
||||
}
|
||||
m.hasSnapshot = true
|
||||
m.latest = sink
|
||||
|
||||
return sink, nil
|
||||
}
|
||||
|
||||
// List returns the latest snapshot taken
|
||||
func (m *InmemSnapshotStore) List() ([]*SnapshotMeta, error) {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
|
||||
if !m.hasSnapshot {
|
||||
return []*SnapshotMeta{}, nil
|
||||
}
|
||||
return []*SnapshotMeta{&m.latest.meta}, nil
|
||||
}
|
||||
|
||||
// Open wraps an io.ReadCloser around the snapshot contents
|
||||
func (m *InmemSnapshotStore) Open(id string) (*SnapshotMeta, io.ReadCloser, error) {
|
||||
m.RLock()
|
||||
defer m.RUnlock()
|
||||
|
||||
if m.latest.meta.ID != id {
|
||||
return nil, nil, fmt.Errorf("[ERR] snapshot: failed to open snapshot id: %s", id)
|
||||
}
|
||||
|
||||
return &m.latest.meta, ioutil.NopCloser(m.latest.contents), nil
|
||||
}
|
||||
|
||||
// Write appends the given bytes to the snapshot contents
|
||||
func (s *InmemSnapshotSink) Write(p []byte) (n int, err error) {
|
||||
written, err := io.Copy(s.contents, bytes.NewReader(p))
|
||||
s.meta.Size += written
|
||||
return int(written), err
|
||||
}
|
||||
|
||||
// Close updates the Size and is otherwise a no-op
|
||||
func (s *InmemSnapshotSink) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *InmemSnapshotSink) ID() string {
|
||||
return s.meta.ID
|
||||
}
|
||||
|
||||
func (s *InmemSnapshotSink) Cancel() error {
|
||||
return nil
|
||||
}
|
||||
125
vendor/github.com/hashicorp/raft/inmem_store.go
generated
vendored
Normal file
125
vendor/github.com/hashicorp/raft/inmem_store.go
generated
vendored
Normal file
@@ -0,0 +1,125 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// InmemStore implements the LogStore and StableStore interface.
|
||||
// It should NOT EVER be used for production. It is used only for
|
||||
// unit tests. Use the MDBStore implementation instead.
|
||||
type InmemStore struct {
|
||||
l sync.RWMutex
|
||||
lowIndex uint64
|
||||
highIndex uint64
|
||||
logs map[uint64]*Log
|
||||
kv map[string][]byte
|
||||
kvInt map[string]uint64
|
||||
}
|
||||
|
||||
// NewInmemStore returns a new in-memory backend. Do not ever
|
||||
// use for production. Only for testing.
|
||||
func NewInmemStore() *InmemStore {
|
||||
i := &InmemStore{
|
||||
logs: make(map[uint64]*Log),
|
||||
kv: make(map[string][]byte),
|
||||
kvInt: make(map[string]uint64),
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
// FirstIndex implements the LogStore interface.
|
||||
func (i *InmemStore) FirstIndex() (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.lowIndex, nil
|
||||
}
|
||||
|
||||
// LastIndex implements the LogStore interface.
|
||||
func (i *InmemStore) LastIndex() (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.highIndex, nil
|
||||
}
|
||||
|
||||
// GetLog implements the LogStore interface.
|
||||
func (i *InmemStore) GetLog(index uint64, log *Log) error {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
l, ok := i.logs[index]
|
||||
if !ok {
|
||||
return ErrLogNotFound
|
||||
}
|
||||
*log = *l
|
||||
return nil
|
||||
}
|
||||
|
||||
// StoreLog implements the LogStore interface.
|
||||
func (i *InmemStore) StoreLog(log *Log) error {
|
||||
return i.StoreLogs([]*Log{log})
|
||||
}
|
||||
|
||||
// StoreLogs implements the LogStore interface.
|
||||
func (i *InmemStore) StoreLogs(logs []*Log) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
for _, l := range logs {
|
||||
i.logs[l.Index] = l
|
||||
if i.lowIndex == 0 {
|
||||
i.lowIndex = l.Index
|
||||
}
|
||||
if l.Index > i.highIndex {
|
||||
i.highIndex = l.Index
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteRange implements the LogStore interface.
|
||||
func (i *InmemStore) DeleteRange(min, max uint64) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
for j := min; j <= max; j++ {
|
||||
delete(i.logs, j)
|
||||
}
|
||||
if min <= i.lowIndex {
|
||||
i.lowIndex = max + 1
|
||||
}
|
||||
if max >= i.highIndex {
|
||||
i.highIndex = min - 1
|
||||
}
|
||||
if i.lowIndex > i.highIndex {
|
||||
i.lowIndex = 0
|
||||
i.highIndex = 0
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Set implements the StableStore interface.
|
||||
func (i *InmemStore) Set(key []byte, val []byte) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
i.kv[string(key)] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get implements the StableStore interface.
|
||||
func (i *InmemStore) Get(key []byte) ([]byte, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.kv[string(key)], nil
|
||||
}
|
||||
|
||||
// SetUint64 implements the StableStore interface.
|
||||
func (i *InmemStore) SetUint64(key []byte, val uint64) error {
|
||||
i.l.Lock()
|
||||
defer i.l.Unlock()
|
||||
i.kvInt[string(key)] = val
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetUint64 implements the StableStore interface.
|
||||
func (i *InmemStore) GetUint64(key []byte) (uint64, error) {
|
||||
i.l.RLock()
|
||||
defer i.l.RUnlock()
|
||||
return i.kvInt[string(key)], nil
|
||||
}
|
||||
322
vendor/github.com/hashicorp/raft/inmem_transport.go
generated
vendored
Normal file
322
vendor/github.com/hashicorp/raft/inmem_transport.go
generated
vendored
Normal file
@@ -0,0 +1,322 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// NewInmemAddr returns a new in-memory addr with
|
||||
// a randomly generate UUID as the ID.
|
||||
func NewInmemAddr() ServerAddress {
|
||||
return ServerAddress(generateUUID())
|
||||
}
|
||||
|
||||
// inmemPipeline is used to pipeline requests for the in-mem transport.
|
||||
type inmemPipeline struct {
|
||||
trans *InmemTransport
|
||||
peer *InmemTransport
|
||||
peerAddr ServerAddress
|
||||
|
||||
doneCh chan AppendFuture
|
||||
inprogressCh chan *inmemPipelineInflight
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
}
|
||||
|
||||
type inmemPipelineInflight struct {
|
||||
future *appendFuture
|
||||
respCh <-chan RPCResponse
|
||||
}
|
||||
|
||||
// InmemTransport Implements the Transport interface, to allow Raft to be
|
||||
// tested in-memory without going over a network.
|
||||
type InmemTransport struct {
|
||||
sync.RWMutex
|
||||
consumerCh chan RPC
|
||||
localAddr ServerAddress
|
||||
peers map[ServerAddress]*InmemTransport
|
||||
pipelines []*inmemPipeline
|
||||
timeout time.Duration
|
||||
}
|
||||
|
||||
// NewInmemTransport is used to initialize a new transport
|
||||
// and generates a random local address if none is specified
|
||||
func NewInmemTransport(addr ServerAddress) (ServerAddress, *InmemTransport) {
|
||||
if string(addr) == "" {
|
||||
addr = NewInmemAddr()
|
||||
}
|
||||
trans := &InmemTransport{
|
||||
consumerCh: make(chan RPC, 16),
|
||||
localAddr: addr,
|
||||
peers: make(map[ServerAddress]*InmemTransport),
|
||||
timeout: 50 * time.Millisecond,
|
||||
}
|
||||
return addr, trans
|
||||
}
|
||||
|
||||
// SetHeartbeatHandler is used to set optional fast-path for
|
||||
// heartbeats, not supported for this transport.
|
||||
func (i *InmemTransport) SetHeartbeatHandler(cb func(RPC)) {
|
||||
}
|
||||
|
||||
// Consumer implements the Transport interface.
|
||||
func (i *InmemTransport) Consumer() <-chan RPC {
|
||||
return i.consumerCh
|
||||
}
|
||||
|
||||
// LocalAddr implements the Transport interface.
|
||||
func (i *InmemTransport) LocalAddr() ServerAddress {
|
||||
return i.localAddr
|
||||
}
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
func (i *InmemTransport) AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error) {
|
||||
i.RLock()
|
||||
peer, ok := i.peers[target]
|
||||
i.RUnlock()
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("failed to connect to peer: %v", target)
|
||||
}
|
||||
pipeline := newInmemPipeline(i, peer, target)
|
||||
i.Lock()
|
||||
i.pipelines = append(i.pipelines, pipeline)
|
||||
i.Unlock()
|
||||
return pipeline, nil
|
||||
}
|
||||
|
||||
// AppendEntries implements the Transport interface.
|
||||
func (i *InmemTransport) AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
|
||||
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*AppendEntriesResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
// RequestVote implements the Transport interface.
|
||||
func (i *InmemTransport) RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error {
|
||||
rpcResp, err := i.makeRPC(target, args, nil, i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*RequestVoteResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
// InstallSnapshot implements the Transport interface.
|
||||
func (i *InmemTransport) InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
|
||||
rpcResp, err := i.makeRPC(target, args, data, 10*i.timeout)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Copy the result back
|
||||
out := rpcResp.Response.(*InstallSnapshotResponse)
|
||||
*resp = *out
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *InmemTransport) makeRPC(target ServerAddress, args interface{}, r io.Reader, timeout time.Duration) (rpcResp RPCResponse, err error) {
|
||||
i.RLock()
|
||||
peer, ok := i.peers[target]
|
||||
i.RUnlock()
|
||||
|
||||
if !ok {
|
||||
err = fmt.Errorf("failed to connect to peer: %v", target)
|
||||
return
|
||||
}
|
||||
|
||||
// Send the RPC over
|
||||
respCh := make(chan RPCResponse)
|
||||
peer.consumerCh <- RPC{
|
||||
Command: args,
|
||||
Reader: r,
|
||||
RespChan: respCh,
|
||||
}
|
||||
|
||||
// Wait for a response
|
||||
select {
|
||||
case rpcResp = <-respCh:
|
||||
if rpcResp.Error != nil {
|
||||
err = rpcResp.Error
|
||||
}
|
||||
case <-time.After(timeout):
|
||||
err = fmt.Errorf("command timed out")
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// EncodePeer implements the Transport interface.
|
||||
func (i *InmemTransport) EncodePeer(id ServerID, p ServerAddress) []byte {
|
||||
return []byte(p)
|
||||
}
|
||||
|
||||
// DecodePeer implements the Transport interface.
|
||||
func (i *InmemTransport) DecodePeer(buf []byte) ServerAddress {
|
||||
return ServerAddress(buf)
|
||||
}
|
||||
|
||||
// Connect is used to connect this transport to another transport for
|
||||
// a given peer name. This allows for local routing.
|
||||
func (i *InmemTransport) Connect(peer ServerAddress, t Transport) {
|
||||
trans := t.(*InmemTransport)
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.peers[peer] = trans
|
||||
}
|
||||
|
||||
// Disconnect is used to remove the ability to route to a given peer.
|
||||
func (i *InmemTransport) Disconnect(peer ServerAddress) {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
delete(i.peers, peer)
|
||||
|
||||
// Disconnect any pipelines
|
||||
n := len(i.pipelines)
|
||||
for idx := 0; idx < n; idx++ {
|
||||
if i.pipelines[idx].peerAddr == peer {
|
||||
i.pipelines[idx].Close()
|
||||
i.pipelines[idx], i.pipelines[n-1] = i.pipelines[n-1], nil
|
||||
idx--
|
||||
n--
|
||||
}
|
||||
}
|
||||
i.pipelines = i.pipelines[:n]
|
||||
}
|
||||
|
||||
// DisconnectAll is used to remove all routes to peers.
|
||||
func (i *InmemTransport) DisconnectAll() {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
i.peers = make(map[ServerAddress]*InmemTransport)
|
||||
|
||||
// Handle pipelines
|
||||
for _, pipeline := range i.pipelines {
|
||||
pipeline.Close()
|
||||
}
|
||||
i.pipelines = nil
|
||||
}
|
||||
|
||||
// Close is used to permanently disable the transport
|
||||
func (i *InmemTransport) Close() error {
|
||||
i.DisconnectAll()
|
||||
return nil
|
||||
}
|
||||
|
||||
func newInmemPipeline(trans *InmemTransport, peer *InmemTransport, addr ServerAddress) *inmemPipeline {
|
||||
i := &inmemPipeline{
|
||||
trans: trans,
|
||||
peer: peer,
|
||||
peerAddr: addr,
|
||||
doneCh: make(chan AppendFuture, 16),
|
||||
inprogressCh: make(chan *inmemPipelineInflight, 16),
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
go i.decodeResponses()
|
||||
return i
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) decodeResponses() {
|
||||
timeout := i.trans.timeout
|
||||
for {
|
||||
select {
|
||||
case inp := <-i.inprogressCh:
|
||||
var timeoutCh <-chan time.Time
|
||||
if timeout > 0 {
|
||||
timeoutCh = time.After(timeout)
|
||||
}
|
||||
|
||||
select {
|
||||
case rpcResp := <-inp.respCh:
|
||||
// Copy the result back
|
||||
*inp.future.resp = *rpcResp.Response.(*AppendEntriesResponse)
|
||||
inp.future.respond(rpcResp.Error)
|
||||
|
||||
select {
|
||||
case i.doneCh <- inp.future:
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
case <-timeoutCh:
|
||||
inp.future.respond(fmt.Errorf("command timed out"))
|
||||
select {
|
||||
case i.doneCh <- inp.future:
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
case <-i.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
|
||||
// Create a new future
|
||||
future := &appendFuture{
|
||||
start: time.Now(),
|
||||
args: args,
|
||||
resp: resp,
|
||||
}
|
||||
future.init()
|
||||
|
||||
// Handle a timeout
|
||||
var timeout <-chan time.Time
|
||||
if i.trans.timeout > 0 {
|
||||
timeout = time.After(i.trans.timeout)
|
||||
}
|
||||
|
||||
// Send the RPC over
|
||||
respCh := make(chan RPCResponse, 1)
|
||||
rpc := RPC{
|
||||
Command: args,
|
||||
RespChan: respCh,
|
||||
}
|
||||
select {
|
||||
case i.peer.consumerCh <- rpc:
|
||||
case <-timeout:
|
||||
return nil, fmt.Errorf("command enqueue timeout")
|
||||
case <-i.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
|
||||
// Send to be decoded
|
||||
select {
|
||||
case i.inprogressCh <- &inmemPipelineInflight{future, respCh}:
|
||||
return future, nil
|
||||
case <-i.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) Consumer() <-chan AppendFuture {
|
||||
return i.doneCh
|
||||
}
|
||||
|
||||
func (i *inmemPipeline) Close() error {
|
||||
i.shutdownLock.Lock()
|
||||
defer i.shutdownLock.Unlock()
|
||||
if i.shutdown {
|
||||
return nil
|
||||
}
|
||||
|
||||
i.shutdown = true
|
||||
close(i.shutdownCh)
|
||||
return nil
|
||||
}
|
||||
72
vendor/github.com/hashicorp/raft/log.go
generated
vendored
Normal file
72
vendor/github.com/hashicorp/raft/log.go
generated
vendored
Normal file
@@ -0,0 +1,72 @@
|
||||
package raft
|
||||
|
||||
// LogType describes various types of log entries.
|
||||
type LogType uint8
|
||||
|
||||
const (
|
||||
// LogCommand is applied to a user FSM.
|
||||
LogCommand LogType = iota
|
||||
|
||||
// LogNoop is used to assert leadership.
|
||||
LogNoop
|
||||
|
||||
// LogAddPeer is used to add a new peer. This should only be used with
|
||||
// older protocol versions designed to be compatible with unversioned
|
||||
// Raft servers. See comments in config.go for details.
|
||||
LogAddPeerDeprecated
|
||||
|
||||
// LogRemovePeer is used to remove an existing peer. This should only be
|
||||
// used with older protocol versions designed to be compatible with
|
||||
// unversioned Raft servers. See comments in config.go for details.
|
||||
LogRemovePeerDeprecated
|
||||
|
||||
// LogBarrier is used to ensure all preceding operations have been
|
||||
// applied to the FSM. It is similar to LogNoop, but instead of returning
|
||||
// once committed, it only returns once the FSM manager acks it. Otherwise
|
||||
// it is possible there are operations committed but not yet applied to
|
||||
// the FSM.
|
||||
LogBarrier
|
||||
|
||||
// LogConfiguration establishes a membership change configuration. It is
|
||||
// created when a server is added, removed, promoted, etc. Only used
|
||||
// when protocol version 1 or greater is in use.
|
||||
LogConfiguration
|
||||
)
|
||||
|
||||
// Log entries are replicated to all members of the Raft cluster
|
||||
// and form the heart of the replicated state machine.
|
||||
type Log struct {
|
||||
// Index holds the index of the log entry.
|
||||
Index uint64
|
||||
|
||||
// Term holds the election term of the log entry.
|
||||
Term uint64
|
||||
|
||||
// Type holds the type of the log entry.
|
||||
Type LogType
|
||||
|
||||
// Data holds the log entry's type-specific data.
|
||||
Data []byte
|
||||
}
|
||||
|
||||
// LogStore is used to provide an interface for storing
|
||||
// and retrieving logs in a durable fashion.
|
||||
type LogStore interface {
|
||||
// FirstIndex returns the first index written. 0 for no entries.
|
||||
FirstIndex() (uint64, error)
|
||||
|
||||
// LastIndex returns the last index written. 0 for no entries.
|
||||
LastIndex() (uint64, error)
|
||||
|
||||
// GetLog gets a log entry at a given index.
|
||||
GetLog(index uint64, log *Log) error
|
||||
|
||||
// StoreLog stores a log entry.
|
||||
StoreLog(log *Log) error
|
||||
|
||||
// StoreLogs stores multiple log entries.
|
||||
StoreLogs(logs []*Log) error
|
||||
|
||||
// DeleteRange deletes a range of log entries. The range is inclusive.
|
||||
DeleteRange(min, max uint64) error
|
||||
}
|
||||
79
vendor/github.com/hashicorp/raft/log_cache.go
generated
vendored
Normal file
79
vendor/github.com/hashicorp/raft/log_cache.go
generated
vendored
Normal file
@@ -0,0 +1,79 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// LogCache wraps any LogStore implementation to provide an
|
||||
// in-memory ring buffer. This is used to cache access to
|
||||
// the recently written entries. For implementations that do not
|
||||
// cache themselves, this can provide a substantial boost by
|
||||
// avoiding disk I/O on recent entries.
|
||||
type LogCache struct {
|
||||
store LogStore
|
||||
|
||||
cache []*Log
|
||||
l sync.RWMutex
|
||||
}
|
||||
|
||||
// NewLogCache is used to create a new LogCache with the
|
||||
// given capacity and backend store.
|
||||
func NewLogCache(capacity int, store LogStore) (*LogCache, error) {
|
||||
if capacity <= 0 {
|
||||
return nil, fmt.Errorf("capacity must be positive")
|
||||
}
|
||||
c := &LogCache{
|
||||
store: store,
|
||||
cache: make([]*Log, capacity),
|
||||
}
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *LogCache) GetLog(idx uint64, log *Log) error {
|
||||
// Check the buffer for an entry
|
||||
c.l.RLock()
|
||||
cached := c.cache[idx%uint64(len(c.cache))]
|
||||
c.l.RUnlock()
|
||||
|
||||
// Check if entry is valid
|
||||
if cached != nil && cached.Index == idx {
|
||||
*log = *cached
|
||||
return nil
|
||||
}
|
||||
|
||||
// Forward request on cache miss
|
||||
return c.store.GetLog(idx, log)
|
||||
}
|
||||
|
||||
func (c *LogCache) StoreLog(log *Log) error {
|
||||
return c.StoreLogs([]*Log{log})
|
||||
}
|
||||
|
||||
func (c *LogCache) StoreLogs(logs []*Log) error {
|
||||
// Insert the logs into the ring buffer
|
||||
c.l.Lock()
|
||||
for _, l := range logs {
|
||||
c.cache[l.Index%uint64(len(c.cache))] = l
|
||||
}
|
||||
c.l.Unlock()
|
||||
|
||||
return c.store.StoreLogs(logs)
|
||||
}
|
||||
|
||||
func (c *LogCache) FirstIndex() (uint64, error) {
|
||||
return c.store.FirstIndex()
|
||||
}
|
||||
|
||||
func (c *LogCache) LastIndex() (uint64, error) {
|
||||
return c.store.LastIndex()
|
||||
}
|
||||
|
||||
func (c *LogCache) DeleteRange(min, max uint64) error {
|
||||
// Invalidate the cache on deletes
|
||||
c.l.Lock()
|
||||
c.cache = make([]*Log, len(c.cache))
|
||||
c.l.Unlock()
|
||||
|
||||
return c.store.DeleteRange(min, max)
|
||||
}
|
||||
676
vendor/github.com/hashicorp/raft/net_transport.go
generated
vendored
Normal file
676
vendor/github.com/hashicorp/raft/net_transport.go
generated
vendored
Normal file
@@ -0,0 +1,676 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"os"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
)
|
||||
|
||||
const (
|
||||
rpcAppendEntries uint8 = iota
|
||||
rpcRequestVote
|
||||
rpcInstallSnapshot
|
||||
|
||||
// DefaultTimeoutScale is the default TimeoutScale in a NetworkTransport.
|
||||
DefaultTimeoutScale = 256 * 1024 // 256KB
|
||||
|
||||
// rpcMaxPipeline controls the maximum number of outstanding
|
||||
// AppendEntries RPC calls.
|
||||
rpcMaxPipeline = 128
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrTransportShutdown is returned when operations on a transport are
|
||||
// invoked after it's been terminated.
|
||||
ErrTransportShutdown = errors.New("transport shutdown")
|
||||
|
||||
// ErrPipelineShutdown is returned when the pipeline is closed.
|
||||
ErrPipelineShutdown = errors.New("append pipeline closed")
|
||||
)
|
||||
|
||||
/*
|
||||
|
||||
NetworkTransport provides a network based transport that can be
|
||||
used to communicate with Raft on remote machines. It requires
|
||||
an underlying stream layer to provide a stream abstraction, which can
|
||||
be simple TCP, TLS, etc.
|
||||
|
||||
This transport is very simple and lightweight. Each RPC request is
|
||||
framed by sending a byte that indicates the message type, followed
|
||||
by the MsgPack encoded request.
|
||||
|
||||
The response is an error string followed by the response object,
|
||||
both are encoded using MsgPack.
|
||||
|
||||
InstallSnapshot is special, in that after the RPC request we stream
|
||||
the entire state. That socket is not re-used as the connection state
|
||||
is not known if there is an error.
|
||||
|
||||
*/
|
||||
type NetworkTransport struct {
|
||||
connPool map[ServerAddress][]*netConn
|
||||
connPoolLock sync.Mutex
|
||||
|
||||
consumeCh chan RPC
|
||||
|
||||
heartbeatFn func(RPC)
|
||||
heartbeatFnLock sync.Mutex
|
||||
|
||||
logger *log.Logger
|
||||
|
||||
maxPool int
|
||||
|
||||
serverAddressProvider ServerAddressProvider
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
|
||||
stream StreamLayer
|
||||
|
||||
timeout time.Duration
|
||||
TimeoutScale int
|
||||
}
|
||||
|
||||
// NetworkTransportConfig encapsulates configuration for the network transport layer.
|
||||
type NetworkTransportConfig struct {
|
||||
// ServerAddressProvider is used to override the target address when establishing a connection to invoke an RPC
|
||||
ServerAddressProvider ServerAddressProvider
|
||||
|
||||
Logger *log.Logger
|
||||
|
||||
// Dialer
|
||||
Stream StreamLayer
|
||||
|
||||
// MaxPool controls how many connections we will pool
|
||||
MaxPool int
|
||||
|
||||
// Timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
|
||||
// the timeout by (SnapshotSize / TimeoutScale).
|
||||
Timeout time.Duration
|
||||
}
|
||||
|
||||
type ServerAddressProvider interface {
|
||||
ServerAddr(id ServerID) (ServerAddress, error)
|
||||
}
|
||||
|
||||
// StreamLayer is used with the NetworkTransport to provide
|
||||
// the low level stream abstraction.
|
||||
type StreamLayer interface {
|
||||
net.Listener
|
||||
|
||||
// Dial is used to create a new outgoing connection
|
||||
Dial(address ServerAddress, timeout time.Duration) (net.Conn, error)
|
||||
}
|
||||
|
||||
type netConn struct {
|
||||
target ServerAddress
|
||||
conn net.Conn
|
||||
r *bufio.Reader
|
||||
w *bufio.Writer
|
||||
dec *codec.Decoder
|
||||
enc *codec.Encoder
|
||||
}
|
||||
|
||||
func (n *netConn) Release() error {
|
||||
return n.conn.Close()
|
||||
}
|
||||
|
||||
type netPipeline struct {
|
||||
conn *netConn
|
||||
trans *NetworkTransport
|
||||
|
||||
doneCh chan AppendFuture
|
||||
inprogressCh chan *appendFuture
|
||||
|
||||
shutdown bool
|
||||
shutdownCh chan struct{}
|
||||
shutdownLock sync.Mutex
|
||||
}
|
||||
|
||||
// NewNetworkTransportWithConfig creates a new network transport with the given config struct
|
||||
func NewNetworkTransportWithConfig(
|
||||
config *NetworkTransportConfig,
|
||||
) *NetworkTransport {
|
||||
if config.Logger == nil {
|
||||
config.Logger = log.New(os.Stderr, "", log.LstdFlags)
|
||||
}
|
||||
trans := &NetworkTransport{
|
||||
connPool: make(map[ServerAddress][]*netConn),
|
||||
consumeCh: make(chan RPC),
|
||||
logger: config.Logger,
|
||||
maxPool: config.MaxPool,
|
||||
shutdownCh: make(chan struct{}),
|
||||
stream: config.Stream,
|
||||
timeout: config.Timeout,
|
||||
TimeoutScale: DefaultTimeoutScale,
|
||||
serverAddressProvider: config.ServerAddressProvider,
|
||||
}
|
||||
go trans.listen()
|
||||
return trans
|
||||
}
|
||||
|
||||
// NewNetworkTransport creates a new network transport with the given dialer
|
||||
// and listener. The maxPool controls how many connections we will pool. The
|
||||
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
|
||||
// the timeout by (SnapshotSize / TimeoutScale).
|
||||
func NewNetworkTransport(
|
||||
stream StreamLayer,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logOutput io.Writer,
|
||||
) *NetworkTransport {
|
||||
if logOutput == nil {
|
||||
logOutput = os.Stderr
|
||||
}
|
||||
logger := log.New(logOutput, "", log.LstdFlags)
|
||||
config := &NetworkTransportConfig{Stream: stream, MaxPool: maxPool, Timeout: timeout, Logger: logger}
|
||||
return NewNetworkTransportWithConfig(config)
|
||||
}
|
||||
|
||||
// NewNetworkTransportWithLogger creates a new network transport with the given logger, dialer
|
||||
// and listener. The maxPool controls how many connections we will pool. The
|
||||
// timeout is used to apply I/O deadlines. For InstallSnapshot, we multiply
|
||||
// the timeout by (SnapshotSize / TimeoutScale).
|
||||
func NewNetworkTransportWithLogger(
|
||||
stream StreamLayer,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logger *log.Logger,
|
||||
) *NetworkTransport {
|
||||
config := &NetworkTransportConfig{Stream: stream, MaxPool: maxPool, Timeout: timeout, Logger: logger}
|
||||
return NewNetworkTransportWithConfig(config)
|
||||
}
|
||||
|
||||
// SetHeartbeatHandler is used to setup a heartbeat handler
|
||||
// as a fast-pass. This is to avoid head-of-line blocking from
|
||||
// disk IO.
|
||||
func (n *NetworkTransport) SetHeartbeatHandler(cb func(rpc RPC)) {
|
||||
n.heartbeatFnLock.Lock()
|
||||
defer n.heartbeatFnLock.Unlock()
|
||||
n.heartbeatFn = cb
|
||||
}
|
||||
|
||||
// Close is used to stop the network transport.
|
||||
func (n *NetworkTransport) Close() error {
|
||||
n.shutdownLock.Lock()
|
||||
defer n.shutdownLock.Unlock()
|
||||
|
||||
if !n.shutdown {
|
||||
close(n.shutdownCh)
|
||||
n.stream.Close()
|
||||
n.shutdown = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Consumer implements the Transport interface.
|
||||
func (n *NetworkTransport) Consumer() <-chan RPC {
|
||||
return n.consumeCh
|
||||
}
|
||||
|
||||
// LocalAddr implements the Transport interface.
|
||||
func (n *NetworkTransport) LocalAddr() ServerAddress {
|
||||
return ServerAddress(n.stream.Addr().String())
|
||||
}
|
||||
|
||||
// IsShutdown is used to check if the transport is shutdown.
|
||||
func (n *NetworkTransport) IsShutdown() bool {
|
||||
select {
|
||||
case <-n.shutdownCh:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// getExistingConn is used to grab a pooled connection.
|
||||
func (n *NetworkTransport) getPooledConn(target ServerAddress) *netConn {
|
||||
n.connPoolLock.Lock()
|
||||
defer n.connPoolLock.Unlock()
|
||||
|
||||
conns, ok := n.connPool[target]
|
||||
if !ok || len(conns) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var conn *netConn
|
||||
num := len(conns)
|
||||
conn, conns[num-1] = conns[num-1], nil
|
||||
n.connPool[target] = conns[:num-1]
|
||||
return conn
|
||||
}
|
||||
|
||||
// getConnFromAddressProvider returns a connection from the server address provider if available, or defaults to a connection using the target server address
|
||||
func (n *NetworkTransport) getConnFromAddressProvider(id ServerID, target ServerAddress) (*netConn, error) {
|
||||
address := n.getProviderAddressOrFallback(id, target)
|
||||
return n.getConn(address)
|
||||
}
|
||||
|
||||
func (n *NetworkTransport) getProviderAddressOrFallback(id ServerID, target ServerAddress) ServerAddress {
|
||||
if n.serverAddressProvider != nil {
|
||||
serverAddressOverride, err := n.serverAddressProvider.ServerAddr(id)
|
||||
if err != nil {
|
||||
n.logger.Printf("[WARN] Unable to get address for server id %v, using fallback address %v: %v", id, target, err)
|
||||
} else {
|
||||
return serverAddressOverride
|
||||
}
|
||||
}
|
||||
return target
|
||||
}
|
||||
|
||||
// getConn is used to get a connection from the pool.
|
||||
func (n *NetworkTransport) getConn(target ServerAddress) (*netConn, error) {
|
||||
// Check for a pooled conn
|
||||
if conn := n.getPooledConn(target); conn != nil {
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
// Dial a new connection
|
||||
conn, err := n.stream.Dial(target, n.timeout)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Wrap the conn
|
||||
netConn := &netConn{
|
||||
target: target,
|
||||
conn: conn,
|
||||
r: bufio.NewReader(conn),
|
||||
w: bufio.NewWriter(conn),
|
||||
}
|
||||
|
||||
// Setup encoder/decoders
|
||||
netConn.dec = codec.NewDecoder(netConn.r, &codec.MsgpackHandle{})
|
||||
netConn.enc = codec.NewEncoder(netConn.w, &codec.MsgpackHandle{})
|
||||
|
||||
// Done
|
||||
return netConn, nil
|
||||
}
|
||||
|
||||
// returnConn returns a connection back to the pool.
|
||||
func (n *NetworkTransport) returnConn(conn *netConn) {
|
||||
n.connPoolLock.Lock()
|
||||
defer n.connPoolLock.Unlock()
|
||||
|
||||
key := conn.target
|
||||
conns, _ := n.connPool[key]
|
||||
|
||||
if !n.IsShutdown() && len(conns) < n.maxPool {
|
||||
n.connPool[key] = append(conns, conn)
|
||||
} else {
|
||||
conn.Release()
|
||||
}
|
||||
}
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
func (n *NetworkTransport) AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error) {
|
||||
// Get a connection
|
||||
conn, err := n.getConnFromAddressProvider(id, target)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create the pipeline
|
||||
return newNetPipeline(n, conn), nil
|
||||
}
|
||||
|
||||
// AppendEntries implements the Transport interface.
|
||||
func (n *NetworkTransport) AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error {
|
||||
return n.genericRPC(id, target, rpcAppendEntries, args, resp)
|
||||
}
|
||||
|
||||
// RequestVote implements the Transport interface.
|
||||
func (n *NetworkTransport) RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error {
|
||||
return n.genericRPC(id, target, rpcRequestVote, args, resp)
|
||||
}
|
||||
|
||||
// genericRPC handles a simple request/response RPC.
|
||||
func (n *NetworkTransport) genericRPC(id ServerID, target ServerAddress, rpcType uint8, args interface{}, resp interface{}) error {
|
||||
// Get a conn
|
||||
conn, err := n.getConnFromAddressProvider(id, target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set a deadline
|
||||
if n.timeout > 0 {
|
||||
conn.conn.SetDeadline(time.Now().Add(n.timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err = sendRPC(conn, rpcType, args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode the response
|
||||
canReturn, err := decodeResponse(conn, resp)
|
||||
if canReturn {
|
||||
n.returnConn(conn)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// InstallSnapshot implements the Transport interface.
|
||||
func (n *NetworkTransport) InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error {
|
||||
// Get a conn, always close for InstallSnapshot
|
||||
conn, err := n.getConnFromAddressProvider(id, target)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer conn.Release()
|
||||
|
||||
// Set a deadline, scaled by request size
|
||||
if n.timeout > 0 {
|
||||
timeout := n.timeout * time.Duration(args.Size/int64(n.TimeoutScale))
|
||||
if timeout < n.timeout {
|
||||
timeout = n.timeout
|
||||
}
|
||||
conn.conn.SetDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err = sendRPC(conn, rpcInstallSnapshot, args); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Stream the state
|
||||
if _, err = io.Copy(conn.w, data); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Flush
|
||||
if err = conn.w.Flush(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Decode the response, do not return conn
|
||||
_, err = decodeResponse(conn, resp)
|
||||
return err
|
||||
}
|
||||
|
||||
// EncodePeer implements the Transport interface.
|
||||
func (n *NetworkTransport) EncodePeer(id ServerID, p ServerAddress) []byte {
|
||||
address := n.getProviderAddressOrFallback(id, p)
|
||||
return []byte(address)
|
||||
}
|
||||
|
||||
// DecodePeer implements the Transport interface.
|
||||
func (n *NetworkTransport) DecodePeer(buf []byte) ServerAddress {
|
||||
return ServerAddress(buf)
|
||||
}
|
||||
|
||||
// listen is used to handling incoming connections.
|
||||
func (n *NetworkTransport) listen() {
|
||||
for {
|
||||
// Accept incoming connections
|
||||
conn, err := n.stream.Accept()
|
||||
if err != nil {
|
||||
if n.IsShutdown() {
|
||||
return
|
||||
}
|
||||
n.logger.Printf("[ERR] raft-net: Failed to accept connection: %v", err)
|
||||
continue
|
||||
}
|
||||
n.logger.Printf("[DEBUG] raft-net: %v accepted connection from: %v", n.LocalAddr(), conn.RemoteAddr())
|
||||
|
||||
// Handle the connection in dedicated routine
|
||||
go n.handleConn(conn)
|
||||
}
|
||||
}
|
||||
|
||||
// handleConn is used to handle an inbound connection for its lifespan.
|
||||
func (n *NetworkTransport) handleConn(conn net.Conn) {
|
||||
defer conn.Close()
|
||||
r := bufio.NewReader(conn)
|
||||
w := bufio.NewWriter(conn)
|
||||
dec := codec.NewDecoder(r, &codec.MsgpackHandle{})
|
||||
enc := codec.NewEncoder(w, &codec.MsgpackHandle{})
|
||||
|
||||
for {
|
||||
if err := n.handleCommand(r, dec, enc); err != nil {
|
||||
if err != io.EOF {
|
||||
n.logger.Printf("[ERR] raft-net: Failed to decode incoming command: %v", err)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err := w.Flush(); err != nil {
|
||||
n.logger.Printf("[ERR] raft-net: Failed to flush response: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleCommand is used to decode and dispatch a single command.
|
||||
func (n *NetworkTransport) handleCommand(r *bufio.Reader, dec *codec.Decoder, enc *codec.Encoder) error {
|
||||
// Get the rpc type
|
||||
rpcType, err := r.ReadByte()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create the RPC object
|
||||
respCh := make(chan RPCResponse, 1)
|
||||
rpc := RPC{
|
||||
RespChan: respCh,
|
||||
}
|
||||
|
||||
// Decode the command
|
||||
isHeartbeat := false
|
||||
switch rpcType {
|
||||
case rpcAppendEntries:
|
||||
var req AppendEntriesRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
|
||||
// Check if this is a heartbeat
|
||||
if req.Term != 0 && req.Leader != nil &&
|
||||
req.PrevLogEntry == 0 && req.PrevLogTerm == 0 &&
|
||||
len(req.Entries) == 0 && req.LeaderCommitIndex == 0 {
|
||||
isHeartbeat = true
|
||||
}
|
||||
|
||||
case rpcRequestVote:
|
||||
var req RequestVoteRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
|
||||
case rpcInstallSnapshot:
|
||||
var req InstallSnapshotRequest
|
||||
if err := dec.Decode(&req); err != nil {
|
||||
return err
|
||||
}
|
||||
rpc.Command = &req
|
||||
rpc.Reader = io.LimitReader(r, req.Size)
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unknown rpc type %d", rpcType)
|
||||
}
|
||||
|
||||
// Check for heartbeat fast-path
|
||||
if isHeartbeat {
|
||||
n.heartbeatFnLock.Lock()
|
||||
fn := n.heartbeatFn
|
||||
n.heartbeatFnLock.Unlock()
|
||||
if fn != nil {
|
||||
fn(rpc)
|
||||
goto RESP
|
||||
}
|
||||
}
|
||||
|
||||
// Dispatch the RPC
|
||||
select {
|
||||
case n.consumeCh <- rpc:
|
||||
case <-n.shutdownCh:
|
||||
return ErrTransportShutdown
|
||||
}
|
||||
|
||||
// Wait for response
|
||||
RESP:
|
||||
select {
|
||||
case resp := <-respCh:
|
||||
// Send the error first
|
||||
respErr := ""
|
||||
if resp.Error != nil {
|
||||
respErr = resp.Error.Error()
|
||||
}
|
||||
if err := enc.Encode(respErr); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Send the response
|
||||
if err := enc.Encode(resp.Response); err != nil {
|
||||
return err
|
||||
}
|
||||
case <-n.shutdownCh:
|
||||
return ErrTransportShutdown
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// decodeResponse is used to decode an RPC response and reports whether
|
||||
// the connection can be reused.
|
||||
func decodeResponse(conn *netConn, resp interface{}) (bool, error) {
|
||||
// Decode the error if any
|
||||
var rpcError string
|
||||
if err := conn.dec.Decode(&rpcError); err != nil {
|
||||
conn.Release()
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Decode the response
|
||||
if err := conn.dec.Decode(resp); err != nil {
|
||||
conn.Release()
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Format an error if any
|
||||
if rpcError != "" {
|
||||
return true, fmt.Errorf(rpcError)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// sendRPC is used to encode and send the RPC.
|
||||
func sendRPC(conn *netConn, rpcType uint8, args interface{}) error {
|
||||
// Write the request type
|
||||
if err := conn.w.WriteByte(rpcType); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
|
||||
// Send the request
|
||||
if err := conn.enc.Encode(args); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
|
||||
// Flush
|
||||
if err := conn.w.Flush(); err != nil {
|
||||
conn.Release()
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// newNetPipeline is used to construct a netPipeline from a given
|
||||
// transport and connection.
|
||||
func newNetPipeline(trans *NetworkTransport, conn *netConn) *netPipeline {
|
||||
n := &netPipeline{
|
||||
conn: conn,
|
||||
trans: trans,
|
||||
doneCh: make(chan AppendFuture, rpcMaxPipeline),
|
||||
inprogressCh: make(chan *appendFuture, rpcMaxPipeline),
|
||||
shutdownCh: make(chan struct{}),
|
||||
}
|
||||
go n.decodeResponses()
|
||||
return n
|
||||
}
|
||||
|
||||
// decodeResponses is a long running routine that decodes the responses
|
||||
// sent on the connection.
|
||||
func (n *netPipeline) decodeResponses() {
|
||||
timeout := n.trans.timeout
|
||||
for {
|
||||
select {
|
||||
case future := <-n.inprogressCh:
|
||||
if timeout > 0 {
|
||||
n.conn.conn.SetReadDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
_, err := decodeResponse(n.conn, future.resp)
|
||||
future.respond(err)
|
||||
select {
|
||||
case n.doneCh <- future:
|
||||
case <-n.shutdownCh:
|
||||
return
|
||||
}
|
||||
case <-n.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// AppendEntries is used to pipeline a new append entries request.
|
||||
func (n *netPipeline) AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error) {
|
||||
// Create a new future
|
||||
future := &appendFuture{
|
||||
start: time.Now(),
|
||||
args: args,
|
||||
resp: resp,
|
||||
}
|
||||
future.init()
|
||||
|
||||
// Add a send timeout
|
||||
if timeout := n.trans.timeout; timeout > 0 {
|
||||
n.conn.conn.SetWriteDeadline(time.Now().Add(timeout))
|
||||
}
|
||||
|
||||
// Send the RPC
|
||||
if err := sendRPC(n.conn, rpcAppendEntries, future.args); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Hand-off for decoding, this can also cause back-pressure
|
||||
// to prevent too many inflight requests
|
||||
select {
|
||||
case n.inprogressCh <- future:
|
||||
return future, nil
|
||||
case <-n.shutdownCh:
|
||||
return nil, ErrPipelineShutdown
|
||||
}
|
||||
}
|
||||
|
||||
// Consumer returns a channel that can be used to consume complete futures.
|
||||
func (n *netPipeline) Consumer() <-chan AppendFuture {
|
||||
return n.doneCh
|
||||
}
|
||||
|
||||
// Closed is used to shutdown the pipeline connection.
|
||||
func (n *netPipeline) Close() error {
|
||||
n.shutdownLock.Lock()
|
||||
defer n.shutdownLock.Unlock()
|
||||
if n.shutdown {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Release the connection
|
||||
n.conn.Release()
|
||||
|
||||
n.shutdown = true
|
||||
close(n.shutdownCh)
|
||||
return nil
|
||||
}
|
||||
117
vendor/github.com/hashicorp/raft/observer.go
generated
vendored
Normal file
117
vendor/github.com/hashicorp/raft/observer.go
generated
vendored
Normal file
@@ -0,0 +1,117 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// Observation is sent along the given channel to observers when an event occurs.
|
||||
type Observation struct {
|
||||
// Raft holds the Raft instance generating the observation.
|
||||
Raft *Raft
|
||||
// Data holds observation-specific data. Possible types are
|
||||
// *RequestVoteRequest and RaftState.
|
||||
Data interface{}
|
||||
}
|
||||
|
||||
// nextObserverId is used to provide a unique ID for each observer to aid in
|
||||
// deregistration.
|
||||
var nextObserverID uint64
|
||||
|
||||
// FilterFn is a function that can be registered in order to filter observations.
|
||||
// The function reports whether the observation should be included - if
|
||||
// it returns false, the observation will be filtered out.
|
||||
type FilterFn func(o *Observation) bool
|
||||
|
||||
// Observer describes what to do with a given observation.
|
||||
type Observer struct {
|
||||
// numObserved and numDropped are performance counters for this observer.
|
||||
// 64 bit types must be 64 bit aligned to use with atomic operations on
|
||||
// 32 bit platforms, so keep them at the top of the struct.
|
||||
numObserved uint64
|
||||
numDropped uint64
|
||||
|
||||
// channel receives observations.
|
||||
channel chan Observation
|
||||
|
||||
// blocking, if true, will cause Raft to block when sending an observation
|
||||
// to this observer. This should generally be set to false.
|
||||
blocking bool
|
||||
|
||||
// filter will be called to determine if an observation should be sent to
|
||||
// the channel.
|
||||
filter FilterFn
|
||||
|
||||
// id is the ID of this observer in the Raft map.
|
||||
id uint64
|
||||
}
|
||||
|
||||
// NewObserver creates a new observer that can be registered
|
||||
// to make observations on a Raft instance. Observations
|
||||
// will be sent on the given channel if they satisfy the
|
||||
// given filter.
|
||||
//
|
||||
// If blocking is true, the observer will block when it can't
|
||||
// send on the channel, otherwise it may discard events.
|
||||
func NewObserver(channel chan Observation, blocking bool, filter FilterFn) *Observer {
|
||||
return &Observer{
|
||||
channel: channel,
|
||||
blocking: blocking,
|
||||
filter: filter,
|
||||
id: atomic.AddUint64(&nextObserverID, 1),
|
||||
}
|
||||
}
|
||||
|
||||
// GetNumObserved returns the number of observations.
|
||||
func (or *Observer) GetNumObserved() uint64 {
|
||||
return atomic.LoadUint64(&or.numObserved)
|
||||
}
|
||||
|
||||
// GetNumDropped returns the number of dropped observations due to blocking.
|
||||
func (or *Observer) GetNumDropped() uint64 {
|
||||
return atomic.LoadUint64(&or.numDropped)
|
||||
}
|
||||
|
||||
// RegisterObserver registers a new observer.
|
||||
func (r *Raft) RegisterObserver(or *Observer) {
|
||||
r.observersLock.Lock()
|
||||
defer r.observersLock.Unlock()
|
||||
r.observers[or.id] = or
|
||||
}
|
||||
|
||||
// DeregisterObserver deregisters an observer.
|
||||
func (r *Raft) DeregisterObserver(or *Observer) {
|
||||
r.observersLock.Lock()
|
||||
defer r.observersLock.Unlock()
|
||||
delete(r.observers, or.id)
|
||||
}
|
||||
|
||||
// observe sends an observation to every observer.
|
||||
func (r *Raft) observe(o interface{}) {
|
||||
// In general observers should not block. But in any case this isn't
|
||||
// disastrous as we only hold a read lock, which merely prevents
|
||||
// registration / deregistration of observers.
|
||||
r.observersLock.RLock()
|
||||
defer r.observersLock.RUnlock()
|
||||
for _, or := range r.observers {
|
||||
// It's wasteful to do this in the loop, but for the common case
|
||||
// where there are no observers we won't create any objects.
|
||||
ob := Observation{Raft: r, Data: o}
|
||||
if or.filter != nil && !or.filter(&ob) {
|
||||
continue
|
||||
}
|
||||
if or.channel == nil {
|
||||
continue
|
||||
}
|
||||
if or.blocking {
|
||||
or.channel <- ob
|
||||
atomic.AddUint64(&or.numObserved, 1)
|
||||
} else {
|
||||
select {
|
||||
case or.channel <- ob:
|
||||
atomic.AddUint64(&or.numObserved, 1)
|
||||
default:
|
||||
atomic.AddUint64(&or.numDropped, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
98
vendor/github.com/hashicorp/raft/peersjson.go
generated
vendored
Normal file
98
vendor/github.com/hashicorp/raft/peersjson.go
generated
vendored
Normal file
@@ -0,0 +1,98 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io/ioutil"
|
||||
)
|
||||
|
||||
// ReadPeersJSON consumes a legacy peers.json file in the format of the old JSON
|
||||
// peer store and creates a new-style configuration structure. This can be used
|
||||
// to migrate this data or perform manual recovery when running protocol versions
|
||||
// that can interoperate with older, unversioned Raft servers. This should not be
|
||||
// used once server IDs are in use, because the old peers.json file didn't have
|
||||
// support for these, nor non-voter suffrage types.
|
||||
func ReadPeersJSON(path string) (Configuration, error) {
|
||||
// Read in the file.
|
||||
buf, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return Configuration{}, err
|
||||
}
|
||||
|
||||
// Parse it as JSON.
|
||||
var peers []string
|
||||
dec := json.NewDecoder(bytes.NewReader(buf))
|
||||
if err := dec.Decode(&peers); err != nil {
|
||||
return Configuration{}, err
|
||||
}
|
||||
|
||||
// Map it into the new-style configuration structure. We can only specify
|
||||
// voter roles here, and the ID has to be the same as the address.
|
||||
var configuration Configuration
|
||||
for _, peer := range peers {
|
||||
server := Server{
|
||||
Suffrage: Voter,
|
||||
ID: ServerID(peer),
|
||||
Address: ServerAddress(peer),
|
||||
}
|
||||
configuration.Servers = append(configuration.Servers, server)
|
||||
}
|
||||
|
||||
// We should only ingest valid configurations.
|
||||
if err := checkConfiguration(configuration); err != nil {
|
||||
return Configuration{}, err
|
||||
}
|
||||
return configuration, nil
|
||||
}
|
||||
|
||||
// configEntry is used when decoding a new-style peers.json.
|
||||
type configEntry struct {
|
||||
// ID is the ID of the server (a UUID, usually).
|
||||
ID ServerID `json:"id"`
|
||||
|
||||
// Address is the host:port of the server.
|
||||
Address ServerAddress `json:"address"`
|
||||
|
||||
// NonVoter controls the suffrage. We choose this sense so people
|
||||
// can leave this out and get a Voter by default.
|
||||
NonVoter bool `json:"non_voter"`
|
||||
}
|
||||
|
||||
// ReadConfigJSON reads a new-style peers.json and returns a configuration
|
||||
// structure. This can be used to perform manual recovery when running protocol
|
||||
// versions that use server IDs.
|
||||
func ReadConfigJSON(path string) (Configuration, error) {
|
||||
// Read in the file.
|
||||
buf, err := ioutil.ReadFile(path)
|
||||
if err != nil {
|
||||
return Configuration{}, err
|
||||
}
|
||||
|
||||
// Parse it as JSON.
|
||||
var peers []configEntry
|
||||
dec := json.NewDecoder(bytes.NewReader(buf))
|
||||
if err := dec.Decode(&peers); err != nil {
|
||||
return Configuration{}, err
|
||||
}
|
||||
|
||||
// Map it into the new-style configuration structure.
|
||||
var configuration Configuration
|
||||
for _, peer := range peers {
|
||||
suffrage := Voter
|
||||
if peer.NonVoter {
|
||||
suffrage = Nonvoter
|
||||
}
|
||||
server := Server{
|
||||
Suffrage: suffrage,
|
||||
ID: peer.ID,
|
||||
Address: peer.Address,
|
||||
}
|
||||
configuration.Servers = append(configuration.Servers, server)
|
||||
}
|
||||
|
||||
// We should only ingest valid configurations.
|
||||
if err := checkConfiguration(configuration); err != nil {
|
||||
return Configuration{}, err
|
||||
}
|
||||
return configuration, nil
|
||||
}
|
||||
1459
vendor/github.com/hashicorp/raft/raft.go
generated
vendored
Normal file
1459
vendor/github.com/hashicorp/raft/raft.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load Diff
561
vendor/github.com/hashicorp/raft/replication.go
generated
vendored
Normal file
561
vendor/github.com/hashicorp/raft/replication.go
generated
vendored
Normal file
@@ -0,0 +1,561 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
)
|
||||
|
||||
const (
|
||||
maxFailureScale = 12
|
||||
failureWait = 10 * time.Millisecond
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrLogNotFound indicates a given log entry is not available.
|
||||
ErrLogNotFound = errors.New("log not found")
|
||||
|
||||
// ErrPipelineReplicationNotSupported can be returned by the transport to
|
||||
// signal that pipeline replication is not supported in general, and that
|
||||
// no error message should be produced.
|
||||
ErrPipelineReplicationNotSupported = errors.New("pipeline replication not supported")
|
||||
)
|
||||
|
||||
// followerReplication is in charge of sending snapshots and log entries from
|
||||
// this leader during this particular term to a remote follower.
|
||||
type followerReplication struct {
|
||||
// peer contains the network address and ID of the remote follower.
|
||||
peer Server
|
||||
|
||||
// commitment tracks the entries acknowledged by followers so that the
|
||||
// leader's commit index can advance. It is updated on successsful
|
||||
// AppendEntries responses.
|
||||
commitment *commitment
|
||||
|
||||
// stopCh is notified/closed when this leader steps down or the follower is
|
||||
// removed from the cluster. In the follower removed case, it carries a log
|
||||
// index; replication should be attempted with a best effort up through that
|
||||
// index, before exiting.
|
||||
stopCh chan uint64
|
||||
// triggerCh is notified every time new entries are appended to the log.
|
||||
triggerCh chan struct{}
|
||||
|
||||
// currentTerm is the term of this leader, to be included in AppendEntries
|
||||
// requests.
|
||||
currentTerm uint64
|
||||
// nextIndex is the index of the next log entry to send to the follower,
|
||||
// which may fall past the end of the log.
|
||||
nextIndex uint64
|
||||
|
||||
// lastContact is updated to the current time whenever any response is
|
||||
// received from the follower (successful or not). This is used to check
|
||||
// whether the leader should step down (Raft.checkLeaderLease()).
|
||||
lastContact time.Time
|
||||
// lastContactLock protects 'lastContact'.
|
||||
lastContactLock sync.RWMutex
|
||||
|
||||
// failures counts the number of failed RPCs since the last success, which is
|
||||
// used to apply backoff.
|
||||
failures uint64
|
||||
|
||||
// notifyCh is notified to send out a heartbeat, which is used to check that
|
||||
// this server is still leader.
|
||||
notifyCh chan struct{}
|
||||
// notify is a list of futures to be resolved upon receipt of an
|
||||
// acknowledgement, then cleared from this list.
|
||||
notify []*verifyFuture
|
||||
// notifyLock protects 'notify'.
|
||||
notifyLock sync.Mutex
|
||||
|
||||
// stepDown is used to indicate to the leader that we
|
||||
// should step down based on information from a follower.
|
||||
stepDown chan struct{}
|
||||
|
||||
// allowPipeline is used to determine when to pipeline the AppendEntries RPCs.
|
||||
// It is private to this replication goroutine.
|
||||
allowPipeline bool
|
||||
}
|
||||
|
||||
// notifyAll is used to notify all the waiting verify futures
|
||||
// if the follower believes we are still the leader.
|
||||
func (s *followerReplication) notifyAll(leader bool) {
|
||||
// Clear the waiting notifies minimizing lock time
|
||||
s.notifyLock.Lock()
|
||||
n := s.notify
|
||||
s.notify = nil
|
||||
s.notifyLock.Unlock()
|
||||
|
||||
// Submit our votes
|
||||
for _, v := range n {
|
||||
v.vote(leader)
|
||||
}
|
||||
}
|
||||
|
||||
// LastContact returns the time of last contact.
|
||||
func (s *followerReplication) LastContact() time.Time {
|
||||
s.lastContactLock.RLock()
|
||||
last := s.lastContact
|
||||
s.lastContactLock.RUnlock()
|
||||
return last
|
||||
}
|
||||
|
||||
// setLastContact sets the last contact to the current time.
|
||||
func (s *followerReplication) setLastContact() {
|
||||
s.lastContactLock.Lock()
|
||||
s.lastContact = time.Now()
|
||||
s.lastContactLock.Unlock()
|
||||
}
|
||||
|
||||
// replicate is a long running routine that replicates log entries to a single
|
||||
// follower.
|
||||
func (r *Raft) replicate(s *followerReplication) {
|
||||
// Start an async heartbeating routing
|
||||
stopHeartbeat := make(chan struct{})
|
||||
defer close(stopHeartbeat)
|
||||
r.goFunc(func() { r.heartbeat(s, stopHeartbeat) })
|
||||
|
||||
RPC:
|
||||
shouldStop := false
|
||||
for !shouldStop {
|
||||
select {
|
||||
case maxIndex := <-s.stopCh:
|
||||
// Make a best effort to replicate up to this index
|
||||
if maxIndex > 0 {
|
||||
r.replicateTo(s, maxIndex)
|
||||
}
|
||||
return
|
||||
case <-s.triggerCh:
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
shouldStop = r.replicateTo(s, lastLogIdx)
|
||||
case <-randomTimeout(r.conf.CommitTimeout): // TODO: what is this?
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
shouldStop = r.replicateTo(s, lastLogIdx)
|
||||
}
|
||||
|
||||
// If things looks healthy, switch to pipeline mode
|
||||
if !shouldStop && s.allowPipeline {
|
||||
goto PIPELINE
|
||||
}
|
||||
}
|
||||
return
|
||||
|
||||
PIPELINE:
|
||||
// Disable until re-enabled
|
||||
s.allowPipeline = false
|
||||
|
||||
// Replicates using a pipeline for high performance. This method
|
||||
// is not able to gracefully recover from errors, and so we fall back
|
||||
// to standard mode on failure.
|
||||
if err := r.pipelineReplicate(s); err != nil {
|
||||
if err != ErrPipelineReplicationNotSupported {
|
||||
r.logger.Printf("[ERR] raft: Failed to start pipeline replication to %s: %s", s.peer, err)
|
||||
}
|
||||
}
|
||||
goto RPC
|
||||
}
|
||||
|
||||
// replicateTo is a helper to replicate(), used to replicate the logs up to a
|
||||
// given last index.
|
||||
// If the follower log is behind, we take care to bring them up to date.
|
||||
func (r *Raft) replicateTo(s *followerReplication, lastIndex uint64) (shouldStop bool) {
|
||||
// Create the base request
|
||||
var req AppendEntriesRequest
|
||||
var resp AppendEntriesResponse
|
||||
var start time.Time
|
||||
START:
|
||||
// Prevent an excessive retry rate on errors
|
||||
if s.failures > 0 {
|
||||
select {
|
||||
case <-time.After(backoff(failureWait, s.failures, maxFailureScale)):
|
||||
case <-r.shutdownCh:
|
||||
}
|
||||
}
|
||||
|
||||
// Setup the request
|
||||
if err := r.setupAppendEntries(s, &req, s.nextIndex, lastIndex); err == ErrLogNotFound {
|
||||
goto SEND_SNAP
|
||||
} else if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// Make the RPC call
|
||||
start = time.Now()
|
||||
if err := r.trans.AppendEntries(s.peer.ID, s.peer.Address, &req, &resp); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to AppendEntries to %v: %v", s.peer, err)
|
||||
s.failures++
|
||||
return
|
||||
}
|
||||
appendStats(string(s.peer.ID), start, float32(len(req.Entries)))
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return true
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Update s based on success
|
||||
if resp.Success {
|
||||
// Update our replication state
|
||||
updateLastAppended(s, &req)
|
||||
|
||||
// Clear any failures, allow pipelining
|
||||
s.failures = 0
|
||||
s.allowPipeline = true
|
||||
} else {
|
||||
s.nextIndex = max(min(s.nextIndex-1, resp.LastLog+1), 1)
|
||||
if resp.NoRetryBackoff {
|
||||
s.failures = 0
|
||||
} else {
|
||||
s.failures++
|
||||
}
|
||||
r.logger.Printf("[WARN] raft: AppendEntries to %v rejected, sending older logs (next: %d)", s.peer, s.nextIndex)
|
||||
}
|
||||
|
||||
CHECK_MORE:
|
||||
// Poll the stop channel here in case we are looping and have been asked
|
||||
// to stop, or have stepped down as leader. Even for the best effort case
|
||||
// where we are asked to replicate to a given index and then shutdown,
|
||||
// it's better to not loop in here to send lots of entries to a straggler
|
||||
// that's leaving the cluster anyways.
|
||||
select {
|
||||
case <-s.stopCh:
|
||||
return true
|
||||
default:
|
||||
}
|
||||
|
||||
// Check if there are more logs to replicate
|
||||
if s.nextIndex <= lastIndex {
|
||||
goto START
|
||||
}
|
||||
return
|
||||
|
||||
// SEND_SNAP is used when we fail to get a log, usually because the follower
|
||||
// is too far behind, and we must ship a snapshot down instead
|
||||
SEND_SNAP:
|
||||
if stop, err := r.sendLatestSnapshot(s); stop {
|
||||
return true
|
||||
} else if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to send snapshot to %v: %v", s.peer, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Check if there is more to replicate
|
||||
goto CHECK_MORE
|
||||
}
|
||||
|
||||
// sendLatestSnapshot is used to send the latest snapshot we have
|
||||
// down to our follower.
|
||||
func (r *Raft) sendLatestSnapshot(s *followerReplication) (bool, error) {
|
||||
// Get the snapshots
|
||||
snapshots, err := r.snapshots.List()
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to list snapshots: %v", err)
|
||||
return false, err
|
||||
}
|
||||
|
||||
// Check we have at least a single snapshot
|
||||
if len(snapshots) == 0 {
|
||||
return false, fmt.Errorf("no snapshots found")
|
||||
}
|
||||
|
||||
// Open the most recent snapshot
|
||||
snapID := snapshots[0].ID
|
||||
meta, snapshot, err := r.snapshots.Open(snapID)
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to open snapshot %v: %v", snapID, err)
|
||||
return false, err
|
||||
}
|
||||
defer snapshot.Close()
|
||||
|
||||
// Setup the request
|
||||
req := InstallSnapshotRequest{
|
||||
RPCHeader: r.getRPCHeader(),
|
||||
SnapshotVersion: meta.Version,
|
||||
Term: s.currentTerm,
|
||||
Leader: r.trans.EncodePeer(r.localID, r.localAddr),
|
||||
LastLogIndex: meta.Index,
|
||||
LastLogTerm: meta.Term,
|
||||
Peers: meta.Peers,
|
||||
Size: meta.Size,
|
||||
Configuration: encodeConfiguration(meta.Configuration),
|
||||
ConfigurationIndex: meta.ConfigurationIndex,
|
||||
}
|
||||
|
||||
// Make the call
|
||||
start := time.Now()
|
||||
var resp InstallSnapshotResponse
|
||||
if err := r.trans.InstallSnapshot(s.peer.ID, s.peer.Address, &req, &resp, snapshot); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to install snapshot %v: %v", snapID, err)
|
||||
s.failures++
|
||||
return false, err
|
||||
}
|
||||
metrics.MeasureSince([]string{"raft", "replication", "installSnapshot", string(s.peer.ID)}, start)
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Check for success
|
||||
if resp.Success {
|
||||
// Update the indexes
|
||||
s.nextIndex = meta.Index + 1
|
||||
s.commitment.match(s.peer.ID, meta.Index)
|
||||
|
||||
// Clear any failures
|
||||
s.failures = 0
|
||||
|
||||
// Notify we are still leader
|
||||
s.notifyAll(true)
|
||||
} else {
|
||||
s.failures++
|
||||
r.logger.Printf("[WARN] raft: InstallSnapshot to %v rejected", s.peer)
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// heartbeat is used to periodically invoke AppendEntries on a peer
|
||||
// to ensure they don't time out. This is done async of replicate(),
|
||||
// since that routine could potentially be blocked on disk IO.
|
||||
func (r *Raft) heartbeat(s *followerReplication, stopCh chan struct{}) {
|
||||
var failures uint64
|
||||
req := AppendEntriesRequest{
|
||||
RPCHeader: r.getRPCHeader(),
|
||||
Term: s.currentTerm,
|
||||
Leader: r.trans.EncodePeer(r.localID, r.localAddr),
|
||||
}
|
||||
var resp AppendEntriesResponse
|
||||
for {
|
||||
// Wait for the next heartbeat interval or forced notify
|
||||
select {
|
||||
case <-s.notifyCh:
|
||||
case <-randomTimeout(r.conf.HeartbeatTimeout / 10):
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
|
||||
start := time.Now()
|
||||
if err := r.trans.AppendEntries(s.peer.ID, s.peer.Address, &req, &resp); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to heartbeat to %v: %v", s.peer.Address, err)
|
||||
failures++
|
||||
select {
|
||||
case <-time.After(backoff(failureWait, failures, maxFailureScale)):
|
||||
case <-stopCh:
|
||||
}
|
||||
} else {
|
||||
s.setLastContact()
|
||||
failures = 0
|
||||
metrics.MeasureSince([]string{"raft", "replication", "heartbeat", string(s.peer.ID)}, start)
|
||||
s.notifyAll(resp.Success)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pipelineReplicate is used when we have synchronized our state with the follower,
|
||||
// and want to switch to a higher performance pipeline mode of replication.
|
||||
// We only pipeline AppendEntries commands, and if we ever hit an error, we fall
|
||||
// back to the standard replication which can handle more complex situations.
|
||||
func (r *Raft) pipelineReplicate(s *followerReplication) error {
|
||||
// Create a new pipeline
|
||||
pipeline, err := r.trans.AppendEntriesPipeline(s.peer.ID, s.peer.Address)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer pipeline.Close()
|
||||
|
||||
// Log start and stop of pipeline
|
||||
r.logger.Printf("[INFO] raft: pipelining replication to peer %v", s.peer)
|
||||
defer r.logger.Printf("[INFO] raft: aborting pipeline replication to peer %v", s.peer)
|
||||
|
||||
// Create a shutdown and finish channel
|
||||
stopCh := make(chan struct{})
|
||||
finishCh := make(chan struct{})
|
||||
|
||||
// Start a dedicated decoder
|
||||
r.goFunc(func() { r.pipelineDecode(s, pipeline, stopCh, finishCh) })
|
||||
|
||||
// Start pipeline sends at the last good nextIndex
|
||||
nextIndex := s.nextIndex
|
||||
|
||||
shouldStop := false
|
||||
SEND:
|
||||
for !shouldStop {
|
||||
select {
|
||||
case <-finishCh:
|
||||
break SEND
|
||||
case maxIndex := <-s.stopCh:
|
||||
// Make a best effort to replicate up to this index
|
||||
if maxIndex > 0 {
|
||||
r.pipelineSend(s, pipeline, &nextIndex, maxIndex)
|
||||
}
|
||||
break SEND
|
||||
case <-s.triggerCh:
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)
|
||||
case <-randomTimeout(r.conf.CommitTimeout):
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
shouldStop = r.pipelineSend(s, pipeline, &nextIndex, lastLogIdx)
|
||||
}
|
||||
}
|
||||
|
||||
// Stop our decoder, and wait for it to finish
|
||||
close(stopCh)
|
||||
select {
|
||||
case <-finishCh:
|
||||
case <-r.shutdownCh:
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// pipelineSend is used to send data over a pipeline. It is a helper to
|
||||
// pipelineReplicate.
|
||||
func (r *Raft) pipelineSend(s *followerReplication, p AppendPipeline, nextIdx *uint64, lastIndex uint64) (shouldStop bool) {
|
||||
// Create a new append request
|
||||
req := new(AppendEntriesRequest)
|
||||
if err := r.setupAppendEntries(s, req, *nextIdx, lastIndex); err != nil {
|
||||
return true
|
||||
}
|
||||
|
||||
// Pipeline the append entries
|
||||
if _, err := p.AppendEntries(req, new(AppendEntriesResponse)); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to pipeline AppendEntries to %v: %v", s.peer, err)
|
||||
return true
|
||||
}
|
||||
|
||||
// Increase the next send log to avoid re-sending old logs
|
||||
if n := len(req.Entries); n > 0 {
|
||||
last := req.Entries[n-1]
|
||||
*nextIdx = last.Index + 1
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// pipelineDecode is used to decode the responses of pipelined requests.
|
||||
func (r *Raft) pipelineDecode(s *followerReplication, p AppendPipeline, stopCh, finishCh chan struct{}) {
|
||||
defer close(finishCh)
|
||||
respCh := p.Consumer()
|
||||
for {
|
||||
select {
|
||||
case ready := <-respCh:
|
||||
req, resp := ready.Request(), ready.Response()
|
||||
appendStats(string(s.peer.ID), ready.Start(), float32(len(req.Entries)))
|
||||
|
||||
// Check for a newer term, stop running
|
||||
if resp.Term > req.Term {
|
||||
r.handleStaleTerm(s)
|
||||
return
|
||||
}
|
||||
|
||||
// Update the last contact
|
||||
s.setLastContact()
|
||||
|
||||
// Abort pipeline if not successful
|
||||
if !resp.Success {
|
||||
return
|
||||
}
|
||||
|
||||
// Update our replication state
|
||||
updateLastAppended(s, req)
|
||||
case <-stopCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// setupAppendEntries is used to setup an append entries request.
|
||||
func (r *Raft) setupAppendEntries(s *followerReplication, req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
|
||||
req.RPCHeader = r.getRPCHeader()
|
||||
req.Term = s.currentTerm
|
||||
req.Leader = r.trans.EncodePeer(r.localID, r.localAddr)
|
||||
req.LeaderCommitIndex = r.getCommitIndex()
|
||||
if err := r.setPreviousLog(req, nextIndex); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.setNewLogs(req, nextIndex, lastIndex); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setPreviousLog is used to setup the PrevLogEntry and PrevLogTerm for an
|
||||
// AppendEntriesRequest given the next index to replicate.
|
||||
func (r *Raft) setPreviousLog(req *AppendEntriesRequest, nextIndex uint64) error {
|
||||
// Guard for the first index, since there is no 0 log entry
|
||||
// Guard against the previous index being a snapshot as well
|
||||
lastSnapIdx, lastSnapTerm := r.getLastSnapshot()
|
||||
if nextIndex == 1 {
|
||||
req.PrevLogEntry = 0
|
||||
req.PrevLogTerm = 0
|
||||
|
||||
} else if (nextIndex - 1) == lastSnapIdx {
|
||||
req.PrevLogEntry = lastSnapIdx
|
||||
req.PrevLogTerm = lastSnapTerm
|
||||
|
||||
} else {
|
||||
var l Log
|
||||
if err := r.logs.GetLog(nextIndex-1, &l); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v",
|
||||
nextIndex-1, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the previous index and term (0 if nextIndex is 1)
|
||||
req.PrevLogEntry = l.Index
|
||||
req.PrevLogTerm = l.Term
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// setNewLogs is used to setup the logs which should be appended for a request.
|
||||
func (r *Raft) setNewLogs(req *AppendEntriesRequest, nextIndex, lastIndex uint64) error {
|
||||
// Append up to MaxAppendEntries or up to the lastIndex
|
||||
req.Entries = make([]*Log, 0, r.conf.MaxAppendEntries)
|
||||
maxIndex := min(nextIndex+uint64(r.conf.MaxAppendEntries)-1, lastIndex)
|
||||
for i := nextIndex; i <= maxIndex; i++ {
|
||||
oldLog := new(Log)
|
||||
if err := r.logs.GetLog(i, oldLog); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to get log at index %d: %v", i, err)
|
||||
return err
|
||||
}
|
||||
req.Entries = append(req.Entries, oldLog)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// appendStats is used to emit stats about an AppendEntries invocation.
|
||||
func appendStats(peer string, start time.Time, logs float32) {
|
||||
metrics.MeasureSince([]string{"raft", "replication", "appendEntries", "rpc", peer}, start)
|
||||
metrics.IncrCounter([]string{"raft", "replication", "appendEntries", "logs", peer}, logs)
|
||||
}
|
||||
|
||||
// handleStaleTerm is used when a follower indicates that we have a stale term.
|
||||
func (r *Raft) handleStaleTerm(s *followerReplication) {
|
||||
r.logger.Printf("[ERR] raft: peer %v has newer term, stopping replication", s.peer)
|
||||
s.notifyAll(false) // No longer leader
|
||||
asyncNotifyCh(s.stepDown)
|
||||
}
|
||||
|
||||
// updateLastAppended is used to update follower replication state after a
|
||||
// successful AppendEntries RPC.
|
||||
// TODO: This isn't used during InstallSnapshot, but the code there is similar.
|
||||
func updateLastAppended(s *followerReplication, req *AppendEntriesRequest) {
|
||||
// Mark any inflight logs as committed
|
||||
if logs := req.Entries; len(logs) > 0 {
|
||||
last := logs[len(logs)-1]
|
||||
s.nextIndex = last.Index + 1
|
||||
s.commitment.match(s.peer.ID, last.Index)
|
||||
}
|
||||
|
||||
// Notify still leader
|
||||
s.notifyAll(true)
|
||||
}
|
||||
239
vendor/github.com/hashicorp/raft/snapshot.go
generated
vendored
Normal file
239
vendor/github.com/hashicorp/raft/snapshot.go
generated
vendored
Normal file
@@ -0,0 +1,239 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"github.com/armon/go-metrics"
|
||||
)
|
||||
|
||||
// SnapshotMeta is for metadata of a snapshot.
|
||||
type SnapshotMeta struct {
|
||||
// Version is the version number of the snapshot metadata. This does not cover
|
||||
// the application's data in the snapshot, that should be versioned
|
||||
// separately.
|
||||
Version SnapshotVersion
|
||||
|
||||
// ID is opaque to the store, and is used for opening.
|
||||
ID string
|
||||
|
||||
// Index and Term store when the snapshot was taken.
|
||||
Index uint64
|
||||
Term uint64
|
||||
|
||||
// Peers is deprecated and used to support version 0 snapshots, but will
|
||||
// be populated in version 1 snapshots as well to help with upgrades.
|
||||
Peers []byte
|
||||
|
||||
// Configuration and ConfigurationIndex are present in version 1
|
||||
// snapshots and later.
|
||||
Configuration Configuration
|
||||
ConfigurationIndex uint64
|
||||
|
||||
// Size is the size of the snapshot in bytes.
|
||||
Size int64
|
||||
}
|
||||
|
||||
// SnapshotStore interface is used to allow for flexible implementations
|
||||
// of snapshot storage and retrieval. For example, a client could implement
|
||||
// a shared state store such as S3, allowing new nodes to restore snapshots
|
||||
// without streaming from the leader.
|
||||
type SnapshotStore interface {
|
||||
// Create is used to begin a snapshot at a given index and term, and with
|
||||
// the given committed configuration. The version parameter controls
|
||||
// which snapshot version to create.
|
||||
Create(version SnapshotVersion, index, term uint64, configuration Configuration,
|
||||
configurationIndex uint64, trans Transport) (SnapshotSink, error)
|
||||
|
||||
// List is used to list the available snapshots in the store.
|
||||
// It should return then in descending order, with the highest index first.
|
||||
List() ([]*SnapshotMeta, error)
|
||||
|
||||
// Open takes a snapshot ID and provides a ReadCloser. Once close is
|
||||
// called it is assumed the snapshot is no longer needed.
|
||||
Open(id string) (*SnapshotMeta, io.ReadCloser, error)
|
||||
}
|
||||
|
||||
// SnapshotSink is returned by StartSnapshot. The FSM will Write state
|
||||
// to the sink and call Close on completion. On error, Cancel will be invoked.
|
||||
type SnapshotSink interface {
|
||||
io.WriteCloser
|
||||
ID() string
|
||||
Cancel() error
|
||||
}
|
||||
|
||||
// runSnapshots is a long running goroutine used to manage taking
|
||||
// new snapshots of the FSM. It runs in parallel to the FSM and
|
||||
// main goroutines, so that snapshots do not block normal operation.
|
||||
func (r *Raft) runSnapshots() {
|
||||
for {
|
||||
select {
|
||||
case <-randomTimeout(r.conf.SnapshotInterval):
|
||||
// Check if we should snapshot
|
||||
if !r.shouldSnapshot() {
|
||||
continue
|
||||
}
|
||||
|
||||
// Trigger a snapshot
|
||||
if _, err := r.takeSnapshot(); err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to take snapshot: %v", err)
|
||||
}
|
||||
|
||||
case future := <-r.userSnapshotCh:
|
||||
// User-triggered, run immediately
|
||||
id, err := r.takeSnapshot()
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to take snapshot: %v", err)
|
||||
} else {
|
||||
future.opener = func() (*SnapshotMeta, io.ReadCloser, error) {
|
||||
return r.snapshots.Open(id)
|
||||
}
|
||||
}
|
||||
future.respond(err)
|
||||
|
||||
case <-r.shutdownCh:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// shouldSnapshot checks if we meet the conditions to take
|
||||
// a new snapshot.
|
||||
func (r *Raft) shouldSnapshot() bool {
|
||||
// Check the last snapshot index
|
||||
lastSnap, _ := r.getLastSnapshot()
|
||||
|
||||
// Check the last log index
|
||||
lastIdx, err := r.logs.LastIndex()
|
||||
if err != nil {
|
||||
r.logger.Printf("[ERR] raft: Failed to get last log index: %v", err)
|
||||
return false
|
||||
}
|
||||
|
||||
// Compare the delta to the threshold
|
||||
delta := lastIdx - lastSnap
|
||||
return delta >= r.conf.SnapshotThreshold
|
||||
}
|
||||
|
||||
// takeSnapshot is used to take a new snapshot. This must only be called from
|
||||
// the snapshot thread, never the main thread. This returns the ID of the new
|
||||
// snapshot, along with an error.
|
||||
func (r *Raft) takeSnapshot() (string, error) {
|
||||
defer metrics.MeasureSince([]string{"raft", "snapshot", "takeSnapshot"}, time.Now())
|
||||
|
||||
// Create a request for the FSM to perform a snapshot.
|
||||
snapReq := &reqSnapshotFuture{}
|
||||
snapReq.init()
|
||||
|
||||
// Wait for dispatch or shutdown.
|
||||
select {
|
||||
case r.fsmSnapshotCh <- snapReq:
|
||||
case <-r.shutdownCh:
|
||||
return "", ErrRaftShutdown
|
||||
}
|
||||
|
||||
// Wait until we get a response
|
||||
if err := snapReq.Error(); err != nil {
|
||||
if err != ErrNothingNewToSnapshot {
|
||||
err = fmt.Errorf("failed to start snapshot: %v", err)
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
defer snapReq.snapshot.Release()
|
||||
|
||||
// Make a request for the configurations and extract the committed info.
|
||||
// We have to use the future here to safely get this information since
|
||||
// it is owned by the main thread.
|
||||
configReq := &configurationsFuture{}
|
||||
configReq.init()
|
||||
select {
|
||||
case r.configurationsCh <- configReq:
|
||||
case <-r.shutdownCh:
|
||||
return "", ErrRaftShutdown
|
||||
}
|
||||
if err := configReq.Error(); err != nil {
|
||||
return "", err
|
||||
}
|
||||
committed := configReq.configurations.committed
|
||||
committedIndex := configReq.configurations.committedIndex
|
||||
|
||||
// We don't support snapshots while there's a config change outstanding
|
||||
// since the snapshot doesn't have a means to represent this state. This
|
||||
// is a little weird because we need the FSM to apply an index that's
|
||||
// past the configuration change, even though the FSM itself doesn't see
|
||||
// the configuration changes. It should be ok in practice with normal
|
||||
// application traffic flowing through the FSM. If there's none of that
|
||||
// then it's not crucial that we snapshot, since there's not much going
|
||||
// on Raft-wise.
|
||||
if snapReq.index < committedIndex {
|
||||
return "", fmt.Errorf("cannot take snapshot now, wait until the configuration entry at %v has been applied (have applied %v)",
|
||||
committedIndex, snapReq.index)
|
||||
}
|
||||
|
||||
// Create a new snapshot.
|
||||
r.logger.Printf("[INFO] raft: Starting snapshot up to %d", snapReq.index)
|
||||
start := time.Now()
|
||||
version := getSnapshotVersion(r.protocolVersion)
|
||||
sink, err := r.snapshots.Create(version, snapReq.index, snapReq.term, committed, committedIndex, r.trans)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to create snapshot: %v", err)
|
||||
}
|
||||
metrics.MeasureSince([]string{"raft", "snapshot", "create"}, start)
|
||||
|
||||
// Try to persist the snapshot.
|
||||
start = time.Now()
|
||||
if err := snapReq.snapshot.Persist(sink); err != nil {
|
||||
sink.Cancel()
|
||||
return "", fmt.Errorf("failed to persist snapshot: %v", err)
|
||||
}
|
||||
metrics.MeasureSince([]string{"raft", "snapshot", "persist"}, start)
|
||||
|
||||
// Close and check for error.
|
||||
if err := sink.Close(); err != nil {
|
||||
return "", fmt.Errorf("failed to close snapshot: %v", err)
|
||||
}
|
||||
|
||||
// Update the last stable snapshot info.
|
||||
r.setLastSnapshot(snapReq.index, snapReq.term)
|
||||
|
||||
// Compact the logs.
|
||||
if err := r.compactLogs(snapReq.index); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
r.logger.Printf("[INFO] raft: Snapshot to %d complete", snapReq.index)
|
||||
return sink.ID(), nil
|
||||
}
|
||||
|
||||
// compactLogs takes the last inclusive index of a snapshot
|
||||
// and trims the logs that are no longer needed.
|
||||
func (r *Raft) compactLogs(snapIdx uint64) error {
|
||||
defer metrics.MeasureSince([]string{"raft", "compactLogs"}, time.Now())
|
||||
// Determine log ranges to compact
|
||||
minLog, err := r.logs.FirstIndex()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get first log index: %v", err)
|
||||
}
|
||||
|
||||
// Check if we have enough logs to truncate
|
||||
lastLogIdx, _ := r.getLastLog()
|
||||
if lastLogIdx <= r.conf.TrailingLogs {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Truncate up to the end of the snapshot, or `TrailingLogs`
|
||||
// back from the head, which ever is further back. This ensures
|
||||
// at least `TrailingLogs` entries, but does not allow logs
|
||||
// after the snapshot to be removed.
|
||||
maxLog := min(snapIdx, lastLogIdx-r.conf.TrailingLogs)
|
||||
|
||||
// Log this
|
||||
r.logger.Printf("[INFO] raft: Compacting logs from %d to %d", minLog, maxLog)
|
||||
|
||||
// Compact the logs
|
||||
if err := r.logs.DeleteRange(minLog, maxLog); err != nil {
|
||||
return fmt.Errorf("log compaction failed: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
15
vendor/github.com/hashicorp/raft/stable.go
generated
vendored
Normal file
15
vendor/github.com/hashicorp/raft/stable.go
generated
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
package raft
|
||||
|
||||
// StableStore is used to provide stable storage
|
||||
// of key configurations to ensure safety.
|
||||
type StableStore interface {
|
||||
Set(key []byte, val []byte) error
|
||||
|
||||
// Get returns the value for key, or an empty byte slice if key was not found.
|
||||
Get(key []byte) ([]byte, error)
|
||||
|
||||
SetUint64(key []byte, val uint64) error
|
||||
|
||||
// GetUint64 returns the uint64 value for key, or 0 if key was not found.
|
||||
GetUint64(key []byte) (uint64, error)
|
||||
}
|
||||
171
vendor/github.com/hashicorp/raft/state.go
generated
vendored
Normal file
171
vendor/github.com/hashicorp/raft/state.go
generated
vendored
Normal file
@@ -0,0 +1,171 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
)
|
||||
|
||||
// RaftState captures the state of a Raft node: Follower, Candidate, Leader,
|
||||
// or Shutdown.
|
||||
type RaftState uint32
|
||||
|
||||
const (
|
||||
// Follower is the initial state of a Raft node.
|
||||
Follower RaftState = iota
|
||||
|
||||
// Candidate is one of the valid states of a Raft node.
|
||||
Candidate
|
||||
|
||||
// Leader is one of the valid states of a Raft node.
|
||||
Leader
|
||||
|
||||
// Shutdown is the terminal state of a Raft node.
|
||||
Shutdown
|
||||
)
|
||||
|
||||
func (s RaftState) String() string {
|
||||
switch s {
|
||||
case Follower:
|
||||
return "Follower"
|
||||
case Candidate:
|
||||
return "Candidate"
|
||||
case Leader:
|
||||
return "Leader"
|
||||
case Shutdown:
|
||||
return "Shutdown"
|
||||
default:
|
||||
return "Unknown"
|
||||
}
|
||||
}
|
||||
|
||||
// raftState is used to maintain various state variables
|
||||
// and provides an interface to set/get the variables in a
|
||||
// thread safe manner.
|
||||
type raftState struct {
|
||||
// currentTerm commitIndex, lastApplied, must be kept at the top of
|
||||
// the struct so they're 64 bit aligned which is a requirement for
|
||||
// atomic ops on 32 bit platforms.
|
||||
|
||||
// The current term, cache of StableStore
|
||||
currentTerm uint64
|
||||
|
||||
// Highest committed log entry
|
||||
commitIndex uint64
|
||||
|
||||
// Last applied log to the FSM
|
||||
lastApplied uint64
|
||||
|
||||
// protects 4 next fields
|
||||
lastLock sync.Mutex
|
||||
|
||||
// Cache the latest snapshot index/term
|
||||
lastSnapshotIndex uint64
|
||||
lastSnapshotTerm uint64
|
||||
|
||||
// Cache the latest log from LogStore
|
||||
lastLogIndex uint64
|
||||
lastLogTerm uint64
|
||||
|
||||
// Tracks running goroutines
|
||||
routinesGroup sync.WaitGroup
|
||||
|
||||
// The current state
|
||||
state RaftState
|
||||
}
|
||||
|
||||
func (r *raftState) getState() RaftState {
|
||||
stateAddr := (*uint32)(&r.state)
|
||||
return RaftState(atomic.LoadUint32(stateAddr))
|
||||
}
|
||||
|
||||
func (r *raftState) setState(s RaftState) {
|
||||
stateAddr := (*uint32)(&r.state)
|
||||
atomic.StoreUint32(stateAddr, uint32(s))
|
||||
}
|
||||
|
||||
func (r *raftState) getCurrentTerm() uint64 {
|
||||
return atomic.LoadUint64(&r.currentTerm)
|
||||
}
|
||||
|
||||
func (r *raftState) setCurrentTerm(term uint64) {
|
||||
atomic.StoreUint64(&r.currentTerm, term)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastLog() (index, term uint64) {
|
||||
r.lastLock.Lock()
|
||||
index = r.lastLogIndex
|
||||
term = r.lastLogTerm
|
||||
r.lastLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
func (r *raftState) setLastLog(index, term uint64) {
|
||||
r.lastLock.Lock()
|
||||
r.lastLogIndex = index
|
||||
r.lastLogTerm = term
|
||||
r.lastLock.Unlock()
|
||||
}
|
||||
|
||||
func (r *raftState) getLastSnapshot() (index, term uint64) {
|
||||
r.lastLock.Lock()
|
||||
index = r.lastSnapshotIndex
|
||||
term = r.lastSnapshotTerm
|
||||
r.lastLock.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
func (r *raftState) setLastSnapshot(index, term uint64) {
|
||||
r.lastLock.Lock()
|
||||
r.lastSnapshotIndex = index
|
||||
r.lastSnapshotTerm = term
|
||||
r.lastLock.Unlock()
|
||||
}
|
||||
|
||||
func (r *raftState) getCommitIndex() uint64 {
|
||||
return atomic.LoadUint64(&r.commitIndex)
|
||||
}
|
||||
|
||||
func (r *raftState) setCommitIndex(index uint64) {
|
||||
atomic.StoreUint64(&r.commitIndex, index)
|
||||
}
|
||||
|
||||
func (r *raftState) getLastApplied() uint64 {
|
||||
return atomic.LoadUint64(&r.lastApplied)
|
||||
}
|
||||
|
||||
func (r *raftState) setLastApplied(index uint64) {
|
||||
atomic.StoreUint64(&r.lastApplied, index)
|
||||
}
|
||||
|
||||
// Start a goroutine and properly handle the race between a routine
|
||||
// starting and incrementing, and exiting and decrementing.
|
||||
func (r *raftState) goFunc(f func()) {
|
||||
r.routinesGroup.Add(1)
|
||||
go func() {
|
||||
defer r.routinesGroup.Done()
|
||||
f()
|
||||
}()
|
||||
}
|
||||
|
||||
func (r *raftState) waitShutdown() {
|
||||
r.routinesGroup.Wait()
|
||||
}
|
||||
|
||||
// getLastIndex returns the last index in stable storage.
|
||||
// Either from the last log or from the last snapshot.
|
||||
func (r *raftState) getLastIndex() uint64 {
|
||||
r.lastLock.Lock()
|
||||
defer r.lastLock.Unlock()
|
||||
return max(r.lastLogIndex, r.lastSnapshotIndex)
|
||||
}
|
||||
|
||||
// getLastEntry returns the last index and term in stable storage.
|
||||
// Either from the last log or from the last snapshot.
|
||||
func (r *raftState) getLastEntry() (uint64, uint64) {
|
||||
r.lastLock.Lock()
|
||||
defer r.lastLock.Unlock()
|
||||
if r.lastLogIndex >= r.lastSnapshotIndex {
|
||||
return r.lastLogIndex, r.lastLogTerm
|
||||
}
|
||||
return r.lastSnapshotIndex, r.lastSnapshotTerm
|
||||
}
|
||||
116
vendor/github.com/hashicorp/raft/tcp_transport.go
generated
vendored
Normal file
116
vendor/github.com/hashicorp/raft/tcp_transport.go
generated
vendored
Normal file
@@ -0,0 +1,116 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"time"
|
||||
)
|
||||
|
||||
var (
|
||||
errNotAdvertisable = errors.New("local bind address is not advertisable")
|
||||
errNotTCP = errors.New("local address is not a TCP address")
|
||||
)
|
||||
|
||||
// TCPStreamLayer implements StreamLayer interface for plain TCP.
|
||||
type TCPStreamLayer struct {
|
||||
advertise net.Addr
|
||||
listener *net.TCPListener
|
||||
}
|
||||
|
||||
// NewTCPTransport returns a NetworkTransport that is built on top of
|
||||
// a TCP streaming transport layer.
|
||||
func NewTCPTransport(
|
||||
bindAddr string,
|
||||
advertise net.Addr,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logOutput io.Writer,
|
||||
) (*NetworkTransport, error) {
|
||||
return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {
|
||||
return NewNetworkTransport(stream, maxPool, timeout, logOutput)
|
||||
})
|
||||
}
|
||||
|
||||
// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
|
||||
// a TCP streaming transport layer, with log output going to the supplied Logger
|
||||
func NewTCPTransportWithLogger(
|
||||
bindAddr string,
|
||||
advertise net.Addr,
|
||||
maxPool int,
|
||||
timeout time.Duration,
|
||||
logger *log.Logger,
|
||||
) (*NetworkTransport, error) {
|
||||
return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {
|
||||
return NewNetworkTransportWithLogger(stream, maxPool, timeout, logger)
|
||||
})
|
||||
}
|
||||
|
||||
// NewTCPTransportWithLogger returns a NetworkTransport that is built on top of
|
||||
// a TCP streaming transport layer, using a default logger and the address provider
|
||||
func NewTCPTransportWithConfig(
|
||||
bindAddr string,
|
||||
advertise net.Addr,
|
||||
config *NetworkTransportConfig,
|
||||
) (*NetworkTransport, error) {
|
||||
return newTCPTransport(bindAddr, advertise, func(stream StreamLayer) *NetworkTransport {
|
||||
config.Stream = stream
|
||||
return NewNetworkTransportWithConfig(config)
|
||||
})
|
||||
}
|
||||
|
||||
func newTCPTransport(bindAddr string,
|
||||
advertise net.Addr,
|
||||
transportCreator func(stream StreamLayer) *NetworkTransport) (*NetworkTransport, error) {
|
||||
// Try to bind
|
||||
list, err := net.Listen("tcp", bindAddr)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create stream
|
||||
stream := &TCPStreamLayer{
|
||||
advertise: advertise,
|
||||
listener: list.(*net.TCPListener),
|
||||
}
|
||||
|
||||
// Verify that we have a usable advertise address
|
||||
addr, ok := stream.Addr().(*net.TCPAddr)
|
||||
if !ok {
|
||||
list.Close()
|
||||
return nil, errNotTCP
|
||||
}
|
||||
if addr.IP.IsUnspecified() {
|
||||
list.Close()
|
||||
return nil, errNotAdvertisable
|
||||
}
|
||||
|
||||
// Create the network transport
|
||||
trans := transportCreator(stream)
|
||||
return trans, nil
|
||||
}
|
||||
|
||||
// Dial implements the StreamLayer interface.
|
||||
func (t *TCPStreamLayer) Dial(address ServerAddress, timeout time.Duration) (net.Conn, error) {
|
||||
return net.DialTimeout("tcp", string(address), timeout)
|
||||
}
|
||||
|
||||
// Accept implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Accept() (c net.Conn, err error) {
|
||||
return t.listener.Accept()
|
||||
}
|
||||
|
||||
// Close implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Close() (err error) {
|
||||
return t.listener.Close()
|
||||
}
|
||||
|
||||
// Addr implements the net.Listener interface.
|
||||
func (t *TCPStreamLayer) Addr() net.Addr {
|
||||
// Use an advertise addr if provided
|
||||
if t.advertise != nil {
|
||||
return t.advertise
|
||||
}
|
||||
return t.listener.Addr()
|
||||
}
|
||||
124
vendor/github.com/hashicorp/raft/transport.go
generated
vendored
Normal file
124
vendor/github.com/hashicorp/raft/transport.go
generated
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"io"
|
||||
"time"
|
||||
)
|
||||
|
||||
// RPCResponse captures both a response and a potential error.
|
||||
type RPCResponse struct {
|
||||
Response interface{}
|
||||
Error error
|
||||
}
|
||||
|
||||
// RPC has a command, and provides a response mechanism.
|
||||
type RPC struct {
|
||||
Command interface{}
|
||||
Reader io.Reader // Set only for InstallSnapshot
|
||||
RespChan chan<- RPCResponse
|
||||
}
|
||||
|
||||
// Respond is used to respond with a response, error or both
|
||||
func (r *RPC) Respond(resp interface{}, err error) {
|
||||
r.RespChan <- RPCResponse{resp, err}
|
||||
}
|
||||
|
||||
// Transport provides an interface for network transports
|
||||
// to allow Raft to communicate with other nodes.
|
||||
type Transport interface {
|
||||
// Consumer returns a channel that can be used to
|
||||
// consume and respond to RPC requests.
|
||||
Consumer() <-chan RPC
|
||||
|
||||
// LocalAddr is used to return our local address to distinguish from our peers.
|
||||
LocalAddr() ServerAddress
|
||||
|
||||
// AppendEntriesPipeline returns an interface that can be used to pipeline
|
||||
// AppendEntries requests.
|
||||
AppendEntriesPipeline(id ServerID, target ServerAddress) (AppendPipeline, error)
|
||||
|
||||
// AppendEntries sends the appropriate RPC to the target node.
|
||||
AppendEntries(id ServerID, target ServerAddress, args *AppendEntriesRequest, resp *AppendEntriesResponse) error
|
||||
|
||||
// RequestVote sends the appropriate RPC to the target node.
|
||||
RequestVote(id ServerID, target ServerAddress, args *RequestVoteRequest, resp *RequestVoteResponse) error
|
||||
|
||||
// InstallSnapshot is used to push a snapshot down to a follower. The data is read from
|
||||
// the ReadCloser and streamed to the client.
|
||||
InstallSnapshot(id ServerID, target ServerAddress, args *InstallSnapshotRequest, resp *InstallSnapshotResponse, data io.Reader) error
|
||||
|
||||
// EncodePeer is used to serialize a peer's address.
|
||||
EncodePeer(id ServerID, addr ServerAddress) []byte
|
||||
|
||||
// DecodePeer is used to deserialize a peer's address.
|
||||
DecodePeer([]byte) ServerAddress
|
||||
|
||||
// SetHeartbeatHandler is used to setup a heartbeat handler
|
||||
// as a fast-pass. This is to avoid head-of-line blocking from
|
||||
// disk IO. If a Transport does not support this, it can simply
|
||||
// ignore the call, and push the heartbeat onto the Consumer channel.
|
||||
SetHeartbeatHandler(cb func(rpc RPC))
|
||||
}
|
||||
|
||||
// WithClose is an interface that a transport may provide which
|
||||
// allows a transport to be shut down cleanly when a Raft instance
|
||||
// shuts down.
|
||||
//
|
||||
// It is defined separately from Transport as unfortunately it wasn't in the
|
||||
// original interface specification.
|
||||
type WithClose interface {
|
||||
// Close permanently closes a transport, stopping
|
||||
// any associated goroutines and freeing other resources.
|
||||
Close() error
|
||||
}
|
||||
|
||||
// LoopbackTransport is an interface that provides a loopback transport suitable for testing
|
||||
// e.g. InmemTransport. It's there so we don't have to rewrite tests.
|
||||
type LoopbackTransport interface {
|
||||
Transport // Embedded transport reference
|
||||
WithPeers // Embedded peer management
|
||||
WithClose // with a close routine
|
||||
}
|
||||
|
||||
// WithPeers is an interface that a transport may provide which allows for connection and
|
||||
// disconnection. Unless the transport is a loopback transport, the transport specified to
|
||||
// "Connect" is likely to be nil.
|
||||
type WithPeers interface {
|
||||
Connect(peer ServerAddress, t Transport) // Connect a peer
|
||||
Disconnect(peer ServerAddress) // Disconnect a given peer
|
||||
DisconnectAll() // Disconnect all peers, possibly to reconnect them later
|
||||
}
|
||||
|
||||
// AppendPipeline is used for pipelining AppendEntries requests. It is used
|
||||
// to increase the replication throughput by masking latency and better
|
||||
// utilizing bandwidth.
|
||||
type AppendPipeline interface {
|
||||
// AppendEntries is used to add another request to the pipeline.
|
||||
// The send may block which is an effective form of back-pressure.
|
||||
AppendEntries(args *AppendEntriesRequest, resp *AppendEntriesResponse) (AppendFuture, error)
|
||||
|
||||
// Consumer returns a channel that can be used to consume
|
||||
// response futures when they are ready.
|
||||
Consumer() <-chan AppendFuture
|
||||
|
||||
// Close closes the pipeline and cancels all inflight RPCs
|
||||
Close() error
|
||||
}
|
||||
|
||||
// AppendFuture is used to return information about a pipelined AppendEntries request.
|
||||
type AppendFuture interface {
|
||||
Future
|
||||
|
||||
// Start returns the time that the append request was started.
|
||||
// It is always OK to call this method.
|
||||
Start() time.Time
|
||||
|
||||
// Request holds the parameters of the AppendEntries call.
|
||||
// It is always OK to call this method.
|
||||
Request() *AppendEntriesRequest
|
||||
|
||||
// Response holds the results of the AppendEntries call.
|
||||
// This method must only be called after the Error
|
||||
// method returns, and will only be valid on success.
|
||||
Response() *AppendEntriesResponse
|
||||
}
|
||||
133
vendor/github.com/hashicorp/raft/util.go
generated
vendored
Normal file
133
vendor/github.com/hashicorp/raft/util.go
generated
vendored
Normal file
@@ -0,0 +1,133 @@
|
||||
package raft
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
crand "crypto/rand"
|
||||
"fmt"
|
||||
"math"
|
||||
"math/big"
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/hashicorp/go-msgpack/codec"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// Ensure we use a high-entropy seed for the psuedo-random generator
|
||||
rand.Seed(newSeed())
|
||||
}
|
||||
|
||||
// returns an int64 from a crypto random source
|
||||
// can be used to seed a source for a math/rand.
|
||||
func newSeed() int64 {
|
||||
r, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("failed to read random bytes: %v", err))
|
||||
}
|
||||
return r.Int64()
|
||||
}
|
||||
|
||||
// randomTimeout returns a value that is between the minVal and 2x minVal.
|
||||
func randomTimeout(minVal time.Duration) <-chan time.Time {
|
||||
if minVal == 0 {
|
||||
return nil
|
||||
}
|
||||
extra := (time.Duration(rand.Int63()) % minVal)
|
||||
return time.After(minVal + extra)
|
||||
}
|
||||
|
||||
// min returns the minimum.
|
||||
func min(a, b uint64) uint64 {
|
||||
if a <= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// max returns the maximum.
|
||||
func max(a, b uint64) uint64 {
|
||||
if a >= b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// generateUUID is used to generate a random UUID.
|
||||
func generateUUID() string {
|
||||
buf := make([]byte, 16)
|
||||
if _, err := crand.Read(buf); err != nil {
|
||||
panic(fmt.Errorf("failed to read random bytes: %v", err))
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%08x-%04x-%04x-%04x-%12x",
|
||||
buf[0:4],
|
||||
buf[4:6],
|
||||
buf[6:8],
|
||||
buf[8:10],
|
||||
buf[10:16])
|
||||
}
|
||||
|
||||
// asyncNotifyCh is used to do an async channel send
|
||||
// to a single channel without blocking.
|
||||
func asyncNotifyCh(ch chan struct{}) {
|
||||
select {
|
||||
case ch <- struct{}{}:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// drainNotifyCh empties out a single-item notification channel without
|
||||
// blocking, and returns whether it received anything.
|
||||
func drainNotifyCh(ch chan struct{}) bool {
|
||||
select {
|
||||
case <-ch:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// asyncNotifyBool is used to do an async notification
|
||||
// on a bool channel.
|
||||
func asyncNotifyBool(ch chan bool, v bool) {
|
||||
select {
|
||||
case ch <- v:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Decode reverses the encode operation on a byte slice input.
|
||||
func decodeMsgPack(buf []byte, out interface{}) error {
|
||||
r := bytes.NewBuffer(buf)
|
||||
hd := codec.MsgpackHandle{}
|
||||
dec := codec.NewDecoder(r, &hd)
|
||||
return dec.Decode(out)
|
||||
}
|
||||
|
||||
// Encode writes an encoded object to a new bytes buffer.
|
||||
func encodeMsgPack(in interface{}) (*bytes.Buffer, error) {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
hd := codec.MsgpackHandle{}
|
||||
enc := codec.NewEncoder(buf, &hd)
|
||||
err := enc.Encode(in)
|
||||
return buf, err
|
||||
}
|
||||
|
||||
// backoff is used to compute an exponential backoff
|
||||
// duration. Base time is scaled by the current round,
|
||||
// up to some maximum scale factor.
|
||||
func backoff(base time.Duration, round, limit uint64) time.Duration {
|
||||
power := min(round, limit)
|
||||
for power > 2 {
|
||||
base *= 2
|
||||
power--
|
||||
}
|
||||
return base
|
||||
}
|
||||
|
||||
// Needed for sorting []uint64, used to determine commitment
|
||||
type uint64Slice []uint64
|
||||
|
||||
func (p uint64Slice) Len() int { return len(p) }
|
||||
func (p uint64Slice) Less(i, j int) bool { return p[i] < p[j] }
|
||||
func (p uint64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
|
||||
Reference in New Issue
Block a user