kernel_optimize_test/net/dns_resolver/dns_key.c
Waiman Long d3ec10aa95 KEYS: Don't write out to userspace while holding key semaphore
A lockdep circular locking dependency report was seen when running a
keyutils test:

[12537.027242] ======================================================
[12537.059309] WARNING: possible circular locking dependency detected
[12537.088148] 4.18.0-147.7.1.el8_1.x86_64+debug #1 Tainted: G OE    --------- -  -
[12537.125253] ------------------------------------------------------
[12537.153189] keyctl/25598 is trying to acquire lock:
[12537.175087] 000000007c39f96c (&mm->mmap_sem){++++}, at: __might_fault+0xc4/0x1b0
[12537.208365]
[12537.208365] but task is already holding lock:
[12537.234507] 000000003de5b58d (&type->lock_class){++++}, at: keyctl_read_key+0x15a/0x220
[12537.270476]
[12537.270476] which lock already depends on the new lock.
[12537.270476]
[12537.307209]
[12537.307209] the existing dependency chain (in reverse order) is:
[12537.340754]
[12537.340754] -> #3 (&type->lock_class){++++}:
[12537.367434]        down_write+0x4d/0x110
[12537.385202]        __key_link_begin+0x87/0x280
[12537.405232]        request_key_and_link+0x483/0xf70
[12537.427221]        request_key+0x3c/0x80
[12537.444839]        dns_query+0x1db/0x5a5 [dns_resolver]
[12537.468445]        dns_resolve_server_name_to_ip+0x1e1/0x4d0 [cifs]
[12537.496731]        cifs_reconnect+0xe04/0x2500 [cifs]
[12537.519418]        cifs_readv_from_socket+0x461/0x690 [cifs]
[12537.546263]        cifs_read_from_socket+0xa0/0xe0 [cifs]
[12537.573551]        cifs_demultiplex_thread+0x311/0x2db0 [cifs]
[12537.601045]        kthread+0x30c/0x3d0
[12537.617906]        ret_from_fork+0x3a/0x50
[12537.636225]
[12537.636225] -> #2 (root_key_user.cons_lock){+.+.}:
[12537.664525]        __mutex_lock+0x105/0x11f0
[12537.683734]        request_key_and_link+0x35a/0xf70
[12537.705640]        request_key+0x3c/0x80
[12537.723304]        dns_query+0x1db/0x5a5 [dns_resolver]
[12537.746773]        dns_resolve_server_name_to_ip+0x1e1/0x4d0 [cifs]
[12537.775607]        cifs_reconnect+0xe04/0x2500 [cifs]
[12537.798322]        cifs_readv_from_socket+0x461/0x690 [cifs]
[12537.823369]        cifs_read_from_socket+0xa0/0xe0 [cifs]
[12537.847262]        cifs_demultiplex_thread+0x311/0x2db0 [cifs]
[12537.873477]        kthread+0x30c/0x3d0
[12537.890281]        ret_from_fork+0x3a/0x50
[12537.908649]
[12537.908649] -> #1 (&tcp_ses->srv_mutex){+.+.}:
[12537.935225]        __mutex_lock+0x105/0x11f0
[12537.954450]        cifs_call_async+0x102/0x7f0 [cifs]
[12537.977250]        smb2_async_readv+0x6c3/0xc90 [cifs]
[12538.000659]        cifs_readpages+0x120a/0x1e50 [cifs]
[12538.023920]        read_pages+0xf5/0x560
[12538.041583]        __do_page_cache_readahead+0x41d/0x4b0
[12538.067047]        ondemand_readahead+0x44c/0xc10
[12538.092069]        filemap_fault+0xec1/0x1830
[12538.111637]        __do_fault+0x82/0x260
[12538.129216]        do_fault+0x419/0xfb0
[12538.146390]        __handle_mm_fault+0x862/0xdf0
[12538.167408]        handle_mm_fault+0x154/0x550
[12538.187401]        __do_page_fault+0x42f/0xa60
[12538.207395]        do_page_fault+0x38/0x5e0
[12538.225777]        page_fault+0x1e/0x30
[12538.243010]
[12538.243010] -> #0 (&mm->mmap_sem){++++}:
[12538.267875]        lock_acquire+0x14c/0x420
[12538.286848]        __might_fault+0x119/0x1b0
[12538.306006]        keyring_read_iterator+0x7e/0x170
[12538.327936]        assoc_array_subtree_iterate+0x97/0x280
[12538.352154]        keyring_read+0xe9/0x110
[12538.370558]        keyctl_read_key+0x1b9/0x220
[12538.391470]        do_syscall_64+0xa5/0x4b0
[12538.410511]        entry_SYSCALL_64_after_hwframe+0x6a/0xdf
[12538.435535]
[12538.435535] other info that might help us debug this:
[12538.435535]
[12538.472829] Chain exists of:
[12538.472829]   &mm->mmap_sem --> root_key_user.cons_lock --> &type->lock_class
[12538.472829]
[12538.524820]  Possible unsafe locking scenario:
[12538.524820]
[12538.551431]        CPU0                    CPU1
[12538.572654]        ----                    ----
[12538.595865]   lock(&type->lock_class);
[12538.613737]                                lock(root_key_user.cons_lock);
[12538.644234]                                lock(&type->lock_class);
[12538.672410]   lock(&mm->mmap_sem);
[12538.687758]
[12538.687758]  *** DEADLOCK ***
[12538.687758]
[12538.714455] 1 lock held by keyctl/25598:
[12538.732097]  #0: 000000003de5b58d (&type->lock_class){++++}, at: keyctl_read_key+0x15a/0x220
[12538.770573]
[12538.770573] stack backtrace:
[12538.790136] CPU: 2 PID: 25598 Comm: keyctl Kdump: loaded Tainted: G
[12538.844855] Hardware name: HP ProLiant DL360 Gen9/ProLiant DL360 Gen9, BIOS P89 12/27/2015
[12538.881963] Call Trace:
[12538.892897]  dump_stack+0x9a/0xf0
[12538.907908]  print_circular_bug.isra.25.cold.50+0x1bc/0x279
[12538.932891]  ? save_trace+0xd6/0x250
[12538.948979]  check_prev_add.constprop.32+0xc36/0x14f0
[12538.971643]  ? keyring_compare_object+0x104/0x190
[12538.992738]  ? check_usage+0x550/0x550
[12539.009845]  ? sched_clock+0x5/0x10
[12539.025484]  ? sched_clock_cpu+0x18/0x1e0
[12539.043555]  __lock_acquire+0x1f12/0x38d0
[12539.061551]  ? trace_hardirqs_on+0x10/0x10
[12539.080554]  lock_acquire+0x14c/0x420
[12539.100330]  ? __might_fault+0xc4/0x1b0
[12539.119079]  __might_fault+0x119/0x1b0
[12539.135869]  ? __might_fault+0xc4/0x1b0
[12539.153234]  keyring_read_iterator+0x7e/0x170
[12539.172787]  ? keyring_read+0x110/0x110
[12539.190059]  assoc_array_subtree_iterate+0x97/0x280
[12539.211526]  keyring_read+0xe9/0x110
[12539.227561]  ? keyring_gc_check_iterator+0xc0/0xc0
[12539.249076]  keyctl_read_key+0x1b9/0x220
[12539.266660]  do_syscall_64+0xa5/0x4b0
[12539.283091]  entry_SYSCALL_64_after_hwframe+0x6a/0xdf

One way to prevent this deadlock scenario from happening is to not
allow writing to userspace while holding the key semaphore. Instead,
an internal buffer is allocated for getting the keys out from the
read method first before copying them out to userspace without holding
the lock.

That requires taking out the __user modifier from all the relevant
read methods as well as additional changes to not use any userspace
write helpers. That is,

  1) The put_user() call is replaced by a direct copy.
  2) The copy_to_user() call is replaced by memcpy().
  3) All the fault handling code is removed.

Compiling on a x86-64 system, the size of the rxrpc_read() function is
reduced from 3795 bytes to 2384 bytes with this patch.

Fixes: ^1da177e4c3f4 ("Linux-2.6.12-rc2")
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: David Howells <dhowells@redhat.com>
2020-03-29 12:40:41 +01:00

385 lines
9.7 KiB
C

/* Key type used to cache DNS lookups made by the kernel
*
* See Documentation/networking/dns_resolver.txt
*
* Copyright (c) 2007 Igor Mammedov
* Author(s): Igor Mammedov (niallain@gmail.com)
* Steve French (sfrench@us.ibm.com)
* Wang Lei (wang840925@gmail.com)
* David Howells (dhowells@redhat.com)
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 2.1 of the License, or
* (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/keyctl.h>
#include <linux/err.h>
#include <linux/seq_file.h>
#include <linux/dns_resolver.h>
#include <keys/dns_resolver-type.h>
#include <keys/user-type.h>
#include "internal.h"
MODULE_DESCRIPTION("DNS Resolver");
MODULE_AUTHOR("Wang Lei");
MODULE_LICENSE("GPL");
unsigned int dns_resolver_debug;
module_param_named(debug, dns_resolver_debug, uint, 0644);
MODULE_PARM_DESC(debug, "DNS Resolver debugging mask");
const struct cred *dns_resolver_cache;
#define DNS_ERRORNO_OPTION "dnserror"
/*
* Preparse instantiation data for a dns_resolver key.
*
* For normal hostname lookups, the data must be a NUL-terminated string, with
* the NUL char accounted in datalen.
*
* If the data contains a '#' characters, then we take the clause after each
* one to be an option of the form 'key=value'. The actual data of interest is
* the string leading up to the first '#'. For instance:
*
* "ip1,ip2,...#foo=bar"
*
* For server list requests, the data must begin with a NUL char and be
* followed by a byte indicating the version of the data format. Version 1
* looks something like (note this is packed):
*
* u8 Non-string marker (ie. 0)
* u8 Content (DNS_PAYLOAD_IS_*)
* u8 Version (e.g. 1)
* u8 Source of server list
* u8 Lookup status of server list
* u8 Number of servers
* foreach-server {
* __le16 Name length
* __le16 Priority (as per SRV record, low first)
* __le16 Weight (as per SRV record, higher first)
* __le16 Port
* u8 Source of address list
* u8 Lookup status of address list
* u8 Protocol (DNS_SERVER_PROTOCOL_*)
* u8 Number of addresses
* char[] Name (not NUL-terminated)
* foreach-address {
* u8 Family (DNS_ADDRESS_IS_*)
* union {
* u8[4] ipv4_addr
* u8[16] ipv6_addr
* }
* }
* }
*
*/
static int
dns_resolver_preparse(struct key_preparsed_payload *prep)
{
const struct dns_payload_header *bin;
struct user_key_payload *upayload;
unsigned long derrno;
int ret;
int datalen = prep->datalen, result_len = 0;
const char *data = prep->data, *end, *opt;
if (datalen <= 1 || !data)
return -EINVAL;
if (data[0] == 0) {
/* It may be a server list. */
if (datalen <= sizeof(*bin))
return -EINVAL;
bin = (const struct dns_payload_header *)data;
kenter("[%u,%u],%u", bin->content, bin->version, datalen);
if (bin->content != DNS_PAYLOAD_IS_SERVER_LIST) {
pr_warn_ratelimited(
"dns_resolver: Unsupported content type (%u)\n",
bin->content);
return -EINVAL;
}
if (bin->version != 1) {
pr_warn_ratelimited(
"dns_resolver: Unsupported server list version (%u)\n",
bin->version);
return -EINVAL;
}
result_len = datalen;
goto store_result;
}
kenter("'%*.*s',%u", datalen, datalen, data, datalen);
if (!data || data[datalen - 1] != '\0')
return -EINVAL;
datalen--;
/* deal with any options embedded in the data */
end = data + datalen;
opt = memchr(data, '#', datalen);
if (!opt) {
/* no options: the entire data is the result */
kdebug("no options");
result_len = datalen;
} else {
const char *next_opt;
result_len = opt - data;
opt++;
kdebug("options: '%s'", opt);
do {
int opt_len, opt_nlen;
const char *eq;
char optval[128];
next_opt = memchr(opt, '#', end - opt) ?: end;
opt_len = next_opt - opt;
if (opt_len <= 0 || opt_len > sizeof(optval)) {
pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n",
opt_len);
return -EINVAL;
}
eq = memchr(opt, '=', opt_len);
if (eq) {
opt_nlen = eq - opt;
eq++;
memcpy(optval, eq, next_opt - eq);
optval[next_opt - eq] = '\0';
} else {
opt_nlen = opt_len;
optval[0] = '\0';
}
kdebug("option '%*.*s' val '%s'",
opt_nlen, opt_nlen, opt, optval);
/* see if it's an error number representing a DNS error
* that's to be recorded as the result in this key */
if (opt_nlen == sizeof(DNS_ERRORNO_OPTION) - 1 &&
memcmp(opt, DNS_ERRORNO_OPTION, opt_nlen) == 0) {
kdebug("dns error number option");
ret = kstrtoul(optval, 10, &derrno);
if (ret < 0)
goto bad_option_value;
if (derrno < 1 || derrno > 511)
goto bad_option_value;
kdebug("dns error no. = %lu", derrno);
prep->payload.data[dns_key_error] = ERR_PTR(-derrno);
continue;
}
bad_option_value:
pr_warn_ratelimited("Option '%*.*s' to dns_resolver key: bad/missing value\n",
opt_nlen, opt_nlen, opt);
return -EINVAL;
} while (opt = next_opt + 1, opt < end);
}
/* don't cache the result if we're caching an error saying there's no
* result */
if (prep->payload.data[dns_key_error]) {
kleave(" = 0 [h_error %ld]", PTR_ERR(prep->payload.data[dns_key_error]));
return 0;
}
store_result:
kdebug("store result");
prep->quotalen = result_len;
upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL);
if (!upayload) {
kleave(" = -ENOMEM");
return -ENOMEM;
}
upayload->datalen = result_len;
memcpy(upayload->data, data, result_len);
upayload->data[result_len] = '\0';
prep->payload.data[dns_key_data] = upayload;
kleave(" = 0");
return 0;
}
/*
* Clean up the preparse data
*/
static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
{
pr_devel("==>%s()\n", __func__);
kfree(prep->payload.data[dns_key_data]);
}
/*
* The description is of the form "[<type>:]<domain_name>"
*
* The domain name may be a simple name or an absolute domain name (which
* should end with a period). The domain name is case-independent.
*/
static bool dns_resolver_cmp(const struct key *key,
const struct key_match_data *match_data)
{
int slen, dlen, ret = 0;
const char *src = key->description, *dsp = match_data->raw_data;
kenter("%s,%s", src, dsp);
if (!src || !dsp)
goto no_match;
if (strcasecmp(src, dsp) == 0)
goto matched;
slen = strlen(src);
dlen = strlen(dsp);
if (slen <= 0 || dlen <= 0)
goto no_match;
if (src[slen - 1] == '.')
slen--;
if (dsp[dlen - 1] == '.')
dlen--;
if (slen != dlen || strncasecmp(src, dsp, slen) != 0)
goto no_match;
matched:
ret = 1;
no_match:
kleave(" = %d", ret);
return ret;
}
/*
* Preparse the match criterion.
*/
static int dns_resolver_match_preparse(struct key_match_data *match_data)
{
match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE;
match_data->cmp = dns_resolver_cmp;
return 0;
}
/*
* Describe a DNS key
*/
static void dns_resolver_describe(const struct key *key, struct seq_file *m)
{
seq_puts(m, key->description);
if (key_is_positive(key)) {
int err = PTR_ERR(key->payload.data[dns_key_error]);
if (err)
seq_printf(m, ": %d", err);
else
seq_printf(m, ": %u", key->datalen);
}
}
/*
* read the DNS data
* - the key's semaphore is read-locked
*/
static long dns_resolver_read(const struct key *key,
char *buffer, size_t buflen)
{
int err = PTR_ERR(key->payload.data[dns_key_error]);
if (err)
return err;
return user_read(key, buffer, buflen);
}
struct key_type key_type_dns_resolver = {
.name = "dns_resolver",
.flags = KEY_TYPE_NET_DOMAIN,
.preparse = dns_resolver_preparse,
.free_preparse = dns_resolver_free_preparse,
.instantiate = generic_key_instantiate,
.match_preparse = dns_resolver_match_preparse,
.revoke = user_revoke,
.destroy = user_destroy,
.describe = dns_resolver_describe,
.read = dns_resolver_read,
};
static int __init init_dns_resolver(void)
{
struct cred *cred;
struct key *keyring;
int ret;
/* create an override credential set with a special thread keyring in
* which DNS requests are cached
*
* this is used to prevent malicious redirections from being installed
* with add_key().
*/
cred = prepare_kernel_cred(NULL);
if (!cred)
return -ENOMEM;
keyring = keyring_alloc(".dns_resolver",
GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, cred,
(KEY_POS_ALL & ~KEY_POS_SETATTR) |
KEY_USR_VIEW | KEY_USR_READ,
KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
if (IS_ERR(keyring)) {
ret = PTR_ERR(keyring);
goto failed_put_cred;
}
ret = register_key_type(&key_type_dns_resolver);
if (ret < 0)
goto failed_put_key;
/* instruct request_key() to use this special keyring as a cache for
* the results it looks up */
set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
cred->thread_keyring = keyring;
cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
dns_resolver_cache = cred;
kdebug("DNS resolver keyring: %d\n", key_serial(keyring));
return 0;
failed_put_key:
key_put(keyring);
failed_put_cred:
put_cred(cred);
return ret;
}
static void __exit exit_dns_resolver(void)
{
key_revoke(dns_resolver_cache->thread_keyring);
unregister_key_type(&key_type_dns_resolver);
put_cred(dns_resolver_cache);
}
module_init(init_dns_resolver)
module_exit(exit_dns_resolver)
MODULE_LICENSE("GPL");