dispatch/vendor/github.com/caddyserver/certmagic/solvers.go
2020-05-24 07:21:47 +02:00

435 lines
14 KiB
Go

// Copyright 2015 Matthew Holt
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package certmagic
import (
"crypto/tls"
"encoding/json"
"fmt"
"log"
"net"
"net/http"
"path"
"runtime"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/go-acme/lego/v3/challenge"
"github.com/go-acme/lego/v3/challenge/tlsalpn01"
)
// httpSolver solves the HTTP challenge. It must be
// associated with a config and an address to use
// for solving the challenge. If multiple httpSolvers
// are initialized concurrently, the first one to
// begin will start the server, and the last one to
// finish will stop the server. This solver must be
// wrapped by a distributedSolver to work properly,
// because the only way the HTTP challenge handler
// can access the keyAuth material is by loading it
// from storage, which is done by distributedSolver.
type httpSolver struct {
closed int32 // accessed atomically
acmeManager *ACMEManager
address string
}
// Present starts an HTTP server if none is already listening on s.address.
func (s *httpSolver) Present(domain, token, keyAuth string) error {
solversMu.Lock()
defer solversMu.Unlock()
si := getSolverInfo(s.address)
si.count++
if si.listener != nil {
return nil // already be served by us
}
// notice the unusual error handling here; we
// only continue to start a challenge server if
// we got a listener; in all other cases return
ln, err := robustTryListen(s.address)
if ln == nil {
return err
}
// successfully bound socket, so save listener and start key auth HTTP server
si.listener = ln
go s.serve(si)
return nil
}
// serve is an HTTP server that serves only HTTP challenge responses.
func (s *httpSolver) serve(si *solverInfo) {
defer func() {
if err := recover(); err != nil {
buf := make([]byte, stackTraceBufferSize)
buf = buf[:runtime.Stack(buf, false)]
log.Printf("panic: http solver server: %v\n%s", err, buf)
}
}()
defer close(si.done)
httpServer := &http.Server{Handler: s.acmeManager.HTTPChallengeHandler(http.NewServeMux())}
httpServer.SetKeepAlivesEnabled(false)
err := httpServer.Serve(si.listener)
if err != nil && atomic.LoadInt32(&s.closed) != 1 {
log.Printf("[ERROR] key auth HTTP server: %v", err)
}
}
// CleanUp cleans up the HTTP server if it is the last one to finish.
func (s *httpSolver) CleanUp(domain, token, keyAuth string) error {
solversMu.Lock()
defer solversMu.Unlock()
si := getSolverInfo(s.address)
si.count--
if si.count == 0 {
// last one out turns off the lights
atomic.StoreInt32(&s.closed, 1)
if si.listener != nil {
si.listener.Close()
<-si.done
}
delete(solvers, s.address)
}
return nil
}
// tlsALPNSolver is a type that can solve TLS-ALPN challenges.
// It must have an associated config and address on which to
// serve the challenge.
type tlsALPNSolver struct {
config *Config
address string
}
// Present adds the certificate to the certificate cache and, if
// needed, starts a TLS server for answering TLS-ALPN challenges.
func (s *tlsALPNSolver) Present(domain, token, keyAuth string) error {
// load the certificate into the cache; this isn't strictly necessary
// if we're using the distributed solver since our GetCertificate
// function will check storage for the keyAuth anyway, but it seems
// like loading it into the cache is the right thing to do
cert, err := tlsalpn01.ChallengeCert(domain, keyAuth)
if err != nil {
return err
}
certHash := hashCertificateChain(cert.Certificate)
s.config.certCache.mu.Lock()
s.config.certCache.cache[tlsALPNCertKeyName(domain)] = Certificate{
Certificate: *cert,
Names: []string{domain},
hash: certHash, // perhaps not necesssary
}
s.config.certCache.mu.Unlock()
// the rest of this function increments the
// challenge count for the solver at this
// listener address, and if necessary, starts
// a simple TLS server
solversMu.Lock()
defer solversMu.Unlock()
si := getSolverInfo(s.address)
si.count++
if si.listener != nil {
return nil // already be served by us
}
// notice the unusual error handling here; we
// only continue to start a challenge server if
// we got a listener; in all other cases return
ln, err := robustTryListen(s.address)
if ln == nil {
return err
}
// we were able to bind the socket, so make it into a TLS
// listener, store it with the solverInfo, and start the
// challenge server
si.listener = tls.NewListener(ln, s.config.TLSConfig())
go func() {
defer func() {
if err := recover(); err != nil {
buf := make([]byte, stackTraceBufferSize)
buf = buf[:runtime.Stack(buf, false)]
log.Printf("panic: tls-alpn solver server: %v\n%s", err, buf)
}
}()
defer close(si.done)
for {
conn, err := si.listener.Accept()
if err != nil {
if atomic.LoadInt32(&si.closed) == 1 {
return
}
log.Printf("[ERROR] TLS-ALPN challenge server: accept: %v", err)
continue
}
go s.handleConn(conn)
}
}()
return nil
}
// handleConn completes the TLS handshake and then closes conn.
func (*tlsALPNSolver) handleConn(conn net.Conn) {
defer func() {
if err := recover(); err != nil {
buf := make([]byte, stackTraceBufferSize)
buf = buf[:runtime.Stack(buf, false)]
log.Printf("panic: tls-alpn solver handler: %v\n%s", err, buf)
}
}()
defer conn.Close()
tlsConn, ok := conn.(*tls.Conn)
if !ok {
log.Printf("[ERROR] TLS-ALPN challenge server: expected tls.Conn but got %T: %#v", conn, conn)
return
}
err := tlsConn.Handshake()
if err != nil {
log.Printf("[ERROR] TLS-ALPN challenge server: handshake: %v", err)
return
}
}
// CleanUp removes the challenge certificate from the cache, and if
// it is the last one to finish, stops the TLS server.
func (s *tlsALPNSolver) CleanUp(domain, token, keyAuth string) error {
s.config.certCache.mu.Lock()
delete(s.config.certCache.cache, tlsALPNCertKeyName(domain))
s.config.certCache.mu.Unlock()
solversMu.Lock()
defer solversMu.Unlock()
si := getSolverInfo(s.address)
si.count--
if si.count == 0 {
// last one out turns off the lights
atomic.StoreInt32(&si.closed, 1)
if si.listener != nil {
si.listener.Close()
<-si.done
}
delete(solvers, s.address)
}
return nil
}
// tlsALPNCertKeyName returns the key to use when caching a cert
// for use with the TLS-ALPN ACME challenge. It is simply to help
// avoid conflicts (although at time of writing, there shouldn't
// be, since the cert cache is keyed by hash of certificate chain).
func tlsALPNCertKeyName(sniName string) string {
return sniName + ":acme-tls-alpn"
}
// distributedSolver allows the ACME HTTP-01 and TLS-ALPN challenges
// to be solved by an instance other than the one which initiated it.
// This is useful behind load balancers or in other cluster/fleet
// configurations. The only requirement is that the instance which
// initiates the challenge shares the same storage and locker with
// the others in the cluster. The storage backing the certificate
// cache in distributedSolver.config is crucial.
//
// Obviously, the instance which completes the challenge must be
// serving on the HTTPChallengePort for the HTTP-01 challenge or the
// TLSALPNChallengePort for the TLS-ALPN-01 challenge (or have all
// the packets port-forwarded) to receive and handle the request. The
// server which receives the challenge must handle it by checking to
// see if the challenge token exists in storage, and if so, decode it
// and use it to serve up the correct response. HTTPChallengeHandler
// in this package as well as the GetCertificate method implemented
// by a Config support and even require this behavior.
//
// In short: the only two requirements for cluster operation are
// sharing sync and storage, and using the facilities provided by
// this package for solving the challenges.
type distributedSolver struct {
// The config with a certificate cache
// with a reference to the storage to
// use which is shared among all the
// instances in the cluster - REQUIRED.
acmeManager *ACMEManager
// Since the distributedSolver is only a
// wrapper over an actual solver, place
// the actual solver here.
providerServer challenge.Provider
// The CA endpoint URL associated with
// this solver.
caURL string
}
// Present invokes the underlying solver's Present method
// and also stores domain, token, and keyAuth to the storage
// backing the certificate cache of dhs.acmeManager.
func (dhs distributedSolver) Present(domain, token, keyAuth string) error {
infoBytes, err := json.Marshal(challengeInfo{
Domain: domain,
Token: token,
KeyAuth: keyAuth,
})
if err != nil {
return err
}
err = dhs.acmeManager.config.Storage.Store(dhs.challengeTokensKey(domain), infoBytes)
if err != nil {
return err
}
err = dhs.providerServer.Present(domain, token, keyAuth)
if err != nil {
return fmt.Errorf("presenting with embedded provider: %v", err)
}
return nil
}
// CleanUp invokes the underlying solver's CleanUp method
// and also cleans up any assets saved to storage.
func (dhs distributedSolver) CleanUp(domain, token, keyAuth string) error {
err := dhs.acmeManager.config.Storage.Delete(dhs.challengeTokensKey(domain))
if err != nil {
return err
}
err = dhs.providerServer.CleanUp(domain, token, keyAuth)
if err != nil {
return fmt.Errorf("cleaning up embedded provider: %v", err)
}
return nil
}
// challengeTokensPrefix returns the key prefix for challenge info.
func (dhs distributedSolver) challengeTokensPrefix() string {
return path.Join(dhs.acmeManager.storageKeyCAPrefix(dhs.caURL), "challenge_tokens")
}
// challengeTokensKey returns the key to use to store and access
// challenge info for domain.
func (dhs distributedSolver) challengeTokensKey(domain string) string {
return path.Join(dhs.challengeTokensPrefix(), StorageKeys.Safe(domain)+".json")
}
type challengeInfo struct {
Domain, Token, KeyAuth string
}
// solverInfo associates a listener with the
// number of challenges currently using it.
type solverInfo struct {
closed int32 // accessed atomically
count int
listener net.Listener
done chan struct{} // used to signal when our own solver server is done
}
// getSolverInfo gets a valid solverInfo struct for address.
func getSolverInfo(address string) *solverInfo {
si, ok := solvers[address]
if !ok {
si = &solverInfo{done: make(chan struct{})}
solvers[address] = si
}
return si
}
// robustTryListen calls net.Listen for a TCP socket at addr.
// This function may return both a nil listener and a nil error!
// If it was able to bind the socket, it returns the listener
// and no error. If it wasn't able to bind the socket because
// the socket is already in use, then it returns a nil listener
// and nil error. If it had any other error, it returns the
// error. The intended error handling logic for this function
// is to proceed if the returned listener is not nil; otherwise
// return err (which may also be nil). In other words, this
// function ignores errors if the socket is already in use,
// which is useful for our challenge servers, where we assume
// that whatever is already listening can solve the challenges.
func robustTryListen(addr string) (net.Listener, error) {
var listenErr error
for i := 0; i < 2; i++ {
// doesn't hurt to sleep briefly before the second
// attempt in case the OS has timing issues
if i > 0 {
time.Sleep(100 * time.Millisecond)
}
// if we can bind the socket right away, great!
var ln net.Listener
ln, listenErr = net.Listen("tcp", addr)
if listenErr == nil {
return ln, nil
}
// if it failed just because the socket is already in use, we
// have no choice but to assume that whatever is using the socket
// can answer the challenge already, so we ignore the error
connectErr := dialTCPSocket(addr)
if connectErr == nil {
return nil, nil
}
// hmm, we couldn't connect to the socket, so something else must
// be wrong, right? wrong!! we've had reports across multiple OSes
// now that sometimes connections fail even though the OS told us
// that the address was already in use; either the listener is
// fluctuating between open and closed very, very quickly, or the
// OS is inconsistent and contradicting itself; I have been unable
// to reproduce this, so I'm now resorting to hard-coding substring
// matching in error messages as a really hacky and unreliable
// safeguard against this, until we can idenify exactly what was
// happening; see the following threads for more info:
// https://caddy.community/t/caddy-retry-error/7317
// https://caddy.community/t/v2-upgrade-to-caddy2-failing-with-errors/7423
if strings.Contains(listenErr.Error(), "address already in use") ||
strings.Contains(listenErr.Error(), "one usage of each socket address") {
log.Printf("[WARNING] OS reports a contradiction: %v - but we cannot connect to it, with this error: %v; continuing anyway 🤞 (I don't know what causes this... if you do, please help?)", listenErr, connectErr)
return nil, nil
}
}
return nil, fmt.Errorf("could not start listener for challenge server at %s: %v", addr, listenErr)
}
// dialTCPSocket connects to a TCP address just for the sake of
// seeing if it is open. It returns a nil error if a TCP connection
// can successfully be made to addr within a short timeout.
func dialTCPSocket(addr string) error {
conn, err := net.DialTimeout("tcp", addr, 250*time.Millisecond)
if err == nil {
conn.Close()
}
return err
}
// The active challenge solvers, keyed by listener address,
// and protected by a mutex. Note that the creation of
// solver listeners and the incrementing of their counts
// are atomic operations guarded by this mutex.
var (
solvers = make(map[string]*solverInfo)
solversMu sync.Mutex
)