ANDROID: netfilter: xt_qtaguid: start tracking iface rx/tx at low level

qtaguid tracks the device stats by monitoring when it goes up and down,
then it gets the dev_stats().
But devs don't correctly report stats (either they don't count headers
symmetrically between rx/tx, or they count internal control messages).

Now qtaguid counts the rx/tx bytes/packets during raw:prerouting and
mangle:postrouting (nat is not available in ipv6).

The results are in
  /proc/net/xt_qtaguid/iface_stat_fmt
which outputs a format line (bash expansion):
  ifname  total_skb_{rx,tx}_{bytes,packets}

Added event counters for pre/post handling.
Added extra ctrl_*() pid/uid debugging.

Change-Id: Id84345d544ad1dd5f63e3842cab229e71d339297
Signed-off-by: JP Abgrall <jpa@google.com>
This commit is contained in:
JP Abgrall 2012-04-27 12:57:39 -07:00 committed by Amit Pundir
parent 2890137ec4
commit 8ec2151f38
3 changed files with 233 additions and 67 deletions

View File

@ -114,8 +114,15 @@ module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
/*---------------------------------------------------------------------------*/
static const char *iface_stat_procdirname = "iface_stat";
static struct proc_dir_entry *iface_stat_procdir;
/*
* The iface_stat_all* will go away once userspace gets use to the new fields
* that have a format line.
*/
static const char *iface_stat_all_procfilename = "iface_stat_all";
static struct proc_dir_entry *iface_stat_all_procfile;
static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
static struct proc_dir_entry *iface_stat_fmt_procfile;
/*
* Ordering of locks:
@ -128,9 +135,9 @@ static struct proc_dir_entry *iface_stat_all_procfile;
* Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
* is acquired.
*
* Call tree with all lock holders as of 2011-09-25:
* Call tree with all lock holders as of 2012-04-27:
*
* iface_stat_all_proc_read()
* iface_stat_fmt_proc_read()
* iface_stat_list_lock
* (struct iface_stat)
*
@ -781,13 +788,14 @@ done:
return iface_entry;
}
static int iface_stat_all_proc_read(char *page, char **num_items_returned,
static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
off_t items_to_skip, int char_count,
int *eof, void *data)
{
char *outp = page;
int item_index = 0;
int len;
int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
struct iface_stat *iface_entry;
struct rtnl_link_stats64 dev_stats, *stats;
struct rtnl_link_stats64 no_dev_stats = {0};
@ -797,14 +805,32 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned,
return 0;
}
CT_DEBUG("qtaguid:proc iface_stat_all "
CT_DEBUG("qtaguid:proc iface_stat_fmt "
"pid=%u tgid=%u uid=%u "
"page=%p *num_items_returned=%p off=%ld "
"char_count=%d *eof=%d\n", page, *num_items_returned,
"char_count=%d *eof=%d\n",
current->pid, current->tgid, current_fsuid(),
page, *num_items_returned,
items_to_skip, char_count, *eof);
if (*eof)
return 0;
if (fmt == 2 && item_index++ >= items_to_skip) {
len = snprintf(outp, char_count,
"ifname "
"total_skb_rx_bytes total_skb_rx_packets "
"total_skb_tx_bytes total_skb_tx_packets\n"
);
if (len >= char_count) {
*outp = '\0';
return outp - page;
}
outp += len;
char_count -= len;
(*num_items_returned)++;
}
/*
* This lock will prevent iface_stat_update() from changing active,
* and in turn prevent an interface from unregistering itself.
@ -820,18 +846,37 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned,
} else {
stats = &no_dev_stats;
}
len = snprintf(outp, char_count,
"%s %d "
"%llu %llu %llu %llu "
"%llu %llu %llu %llu\n",
iface_entry->ifname,
iface_entry->active,
iface_entry->totals[IFS_RX].bytes,
iface_entry->totals[IFS_RX].packets,
iface_entry->totals[IFS_TX].bytes,
iface_entry->totals[IFS_TX].packets,
stats->rx_bytes, stats->rx_packets,
stats->tx_bytes, stats->tx_packets);
/*
* If the meaning of the data changes, then update the fmtX
* string.
*/
if (fmt == 1) {
len = snprintf(
outp, char_count,
"%s %d "
"%llu %llu %llu %llu "
"%llu %llu %llu %llu\n",
iface_entry->ifname,
iface_entry->active,
iface_entry->totals_via_dev[IFS_RX].bytes,
iface_entry->totals_via_dev[IFS_RX].packets,
iface_entry->totals_via_dev[IFS_TX].bytes,
iface_entry->totals_via_dev[IFS_TX].packets,
stats->rx_bytes, stats->rx_packets,
stats->tx_bytes, stats->tx_packets
);
} else {
len = snprintf(
outp, char_count,
"%s "
"%llu %llu %llu %llu\n",
iface_entry->ifname,
iface_entry->totals_via_skb[IFS_RX].bytes,
iface_entry->totals_via_skb[IFS_RX].packets,
iface_entry->totals_via_skb[IFS_TX].bytes,
iface_entry->totals_via_skb[IFS_TX].packets
);
}
if (len >= char_count) {
spin_unlock_bh(&iface_stat_list_lock);
*outp = '\0';
@ -865,13 +910,17 @@ static void iface_create_proc_worker(struct work_struct *work)
new_iface->proc_ptr = proc_entry;
create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
read_proc_u64, &new_iface->totals[IFS_TX].bytes);
read_proc_u64,
&new_iface->totals_via_dev[IFS_TX].bytes);
create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
read_proc_u64, &new_iface->totals[IFS_RX].bytes);
read_proc_u64,
&new_iface->totals_via_dev[IFS_RX].bytes);
create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
read_proc_u64, &new_iface->totals[IFS_TX].packets);
read_proc_u64,
&new_iface->totals_via_dev[IFS_TX].packets);
create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
read_proc_u64, &new_iface->totals[IFS_RX].packets);
read_proc_u64,
&new_iface->totals_via_dev[IFS_RX].packets);
create_proc_read_entry("active", proc_iface_perms, proc_entry,
read_proc_bool, &new_iface->active);
@ -975,11 +1024,13 @@ static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
"iface reset its stats unexpectedly\n", __func__,
net_dev->name);
iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
iface->totals[IFS_TX].packets +=
iface->totals_via_dev[IFS_TX].bytes +=
iface->last_known[IFS_TX].bytes;
iface->totals_via_dev[IFS_TX].packets +=
iface->last_known[IFS_TX].packets;
iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
iface->totals[IFS_RX].packets +=
iface->totals_via_dev[IFS_RX].bytes +=
iface->last_known[IFS_RX].bytes;
iface->totals_via_dev[IFS_RX].packets +=
iface->last_known[IFS_RX].packets;
iface->last_known_valid = false;
IF_DEBUG("qtaguid: %s(%s): iface=%p "
@ -1147,6 +1198,27 @@ static struct sock_tag *get_sock_stat(const struct sock *sk)
return sock_tag_entry;
}
static int ipx_proto(const struct sk_buff *skb,
struct xt_action_param *par)
{
int thoff = 0, tproto;
switch (par->family) {
case NFPROTO_IPV6:
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0)
MT_DEBUG("%s(): transport header not found in ipv6"
" skb=%p\n", __func__, skb);
break;
case NFPROTO_IPV4:
tproto = ip_hdr(skb)->protocol;
break;
default:
tproto = IPPROTO_RAW;
}
return tproto;
}
static void
data_counters_update(struct data_counters *dc, int set,
enum ifs_tx_rx direction, int proto, int bytes)
@ -1207,10 +1279,10 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only)
spin_unlock_bh(&iface_stat_list_lock);
return;
}
entry->totals[IFS_TX].bytes += stats->tx_bytes;
entry->totals[IFS_TX].packets += stats->tx_packets;
entry->totals[IFS_RX].bytes += stats->rx_bytes;
entry->totals[IFS_RX].packets += stats->rx_packets;
entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
/* We don't need the last_known[] anymore */
entry->last_known_valid = false;
_iface_stat_set_active(entry, net_dev, false);
@ -1220,6 +1292,67 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only)
spin_unlock_bh(&iface_stat_list_lock);
}
/*
* Update stats for the specified interface from the skb.
* Do nothing if the entry
* does not exist (when a device was never configured with an IP address).
* Called on each sk.
*/
static void iface_stat_update_from_skb(const struct sk_buff *skb,
struct xt_action_param *par)
{
struct iface_stat *entry;
const struct net_device *el_dev;
enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
int bytes = skb->len;
if (!skb->dev) {
MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
el_dev = par->in ? : par->out;
} else {
const struct net_device *other_dev;
el_dev = skb->dev;
other_dev = par->in ? : par->out;
if (el_dev != other_dev) {
MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
"par->(in/out)=%p %s\n",
par->hooknum, el_dev, el_dev->name, other_dev,
other_dev->name);
}
}
if (unlikely(!el_dev)) {
pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
par->hooknum, __func__);
BUG();
} else if (unlikely(!el_dev->name)) {
pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
par->hooknum, __func__);
BUG();
} else {
int proto = ipx_proto(skb, par);
MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
par->hooknum, el_dev->name, el_dev->type,
par->family, proto);
}
spin_lock_bh(&iface_stat_list_lock);
entry = get_iface_entry(el_dev->name);
if (entry == NULL) {
IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
__func__, el_dev->name);
spin_unlock_bh(&iface_stat_list_lock);
return;
}
IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
el_dev->name, entry);
entry->totals_via_skb[direction].bytes += bytes;
entry->totals_via_skb[direction].packets++;
spin_unlock_bh(&iface_stat_list_lock);
}
static void tag_stat_update(struct tag_stat *tag_entry,
enum ifs_tx_rx direction, int proto, int bytes)
{
@ -1467,18 +1600,31 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
parent_procdir);
if (!iface_stat_all_procfile) {
pr_err("qtaguid: iface_stat: init "
" failed to create stat_all proc entry\n");
" failed to create stat_old proc entry\n");
err = -1;
goto err_zap_entry;
}
iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
iface_stat_all_procfile->data = (void *)1; /* fmt1 */
iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
proc_iface_perms,
parent_procdir);
if (!iface_stat_fmt_procfile) {
pr_err("qtaguid: iface_stat: init "
" failed to create stat_all proc entry\n");
err = -1;
goto err_zap_all_stats_entry;
}
iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */
err = register_netdevice_notifier(&iface_netdev_notifier_blk);
if (err) {
pr_err("qtaguid: iface_stat: init "
"failed to register dev event handler\n");
goto err_zap_all_stats_entry;
goto err_zap_all_stats_entries;
}
err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
if (err) {
@ -1499,6 +1645,8 @@ err_unreg_ip4_addr:
unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
err_unreg_nd:
unregister_netdevice_notifier(&iface_netdev_notifier_blk);
err_zap_all_stats_entries:
remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
err_zap_all_stats_entry:
remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
err_zap_entry:
@ -1550,27 +1698,6 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
return sk;
}
static int ipx_proto(const struct sk_buff *skb,
struct xt_action_param *par)
{
int thoff = 0, tproto;
switch (par->family) {
case NFPROTO_IPV6:
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0)
MT_DEBUG("%s(): transport header not found in ipv6"
" skb=%p\n", __func__, skb);
break;
case NFPROTO_IPV4:
tproto = ip_hdr(skb)->protocol;
break;
default:
tproto = IPPROTO_RAW;
}
return tproto;
}
static void account_for_uid(const struct sk_buff *skb,
const struct sock *alternate_sk, uid_t uid,
struct xt_action_param *par)
@ -1630,8 +1757,22 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
goto ret_res;
}
sk = skb->sk;
switch (par->hooknum) {
case NF_INET_PRE_ROUTING:
case NF_INET_POST_ROUTING:
atomic64_inc(&qtu_events.match_calls_prepost);
iface_stat_update_from_skb(skb, par);
/*
* We are done in pre/post. The skb will get processed
* further alter.
*/
res = (info->match ^ info->invert);
goto ret_res;
break;
/* default: Fall through and do UID releated work */
}
sk = skb->sk;
if (sk == NULL) {
/*
* A missing sk->sk_socket happens when packets are in-flight
@ -1806,8 +1947,10 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
if (*eof)
return 0;
CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
page, items_to_skip, char_count, *eof);
CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
"page=%p off=%ld char_count=%d *eof=%d\n",
current->pid, current->tgid, current_fsuid(),
page, items_to_skip, char_count, *eof);
spin_lock_bh(&sock_tag_list_lock);
for (node = rb_first(&sock_tag_tree);
@ -1851,6 +1994,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
"delete_cmds=%llu "
"iface_events=%llu "
"match_calls=%llu "
"match_calls_prepost=%llu "
"match_found_sk=%llu "
"match_found_sk_in_ct=%llu "
"match_found_no_sk_in_ct=%llu "
@ -1862,6 +2006,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
atomic64_read(&qtu_events.delete_cmds),
atomic64_read(&qtu_events.iface_events),
atomic64_read(&qtu_events.match_calls),
atomic64_read(&qtu_events.match_calls_prepost),
atomic64_read(&qtu_events.match_found_sk),
atomic64_read(&qtu_events.match_found_sk_in_ct),
atomic64_read(
@ -2135,7 +2280,9 @@ static int ctrl_cmd_tag(const char *input)
el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
if (!el_socket) {
pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
" sock_fd=%d err=%d\n", input, sock_fd, res);
" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
input, sock_fd, res, current->pid, current->tgid,
current_fsuid());
goto err;
}
CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
@ -2280,7 +2427,9 @@ static int ctrl_cmd_untag(const char *input)
el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
if (!el_socket) {
pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
" sock_fd=%d err=%d\n", input, sock_fd, res);
" sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
input, sock_fd, res, current->pid, current->tgid,
current_fsuid());
goto err;
}
CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
@ -2356,6 +2505,9 @@ static int qtaguid_ctrl_parse(const char *input, int count)
char cmd;
int res;
CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
input, current->pid, current->tgid, current_fsuid());
cmd = input[0];
/* Collect params for commands */
switch (cmd) {
@ -2532,9 +2684,12 @@ static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
return len;
}
CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
"char_count=%d *eof=%d\n", page, *num_items_returned,
items_to_skip, char_count, *eof);
CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
"page=%p *num_items_returned=%p off=%ld "
"char_count=%d *eof=%d\n",
current->pid, current->tgid, current_fsuid(),
page, *num_items_returned,
items_to_skip, char_count, *eof);
if (*eof)
return 0;

View File

@ -202,7 +202,8 @@ struct iface_stat {
/* net_dev is only valid for active iface_stat */
struct net_device *net_dev;
struct byte_packet_counters totals[IFS_MAX_DIRECTIONS];
struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS];
/*
* We keep the last_known, because some devices reset their counters
* just before NETDEV_UP, while some will reset just before
@ -254,6 +255,8 @@ struct qtaguid_event_counts {
atomic64_t iface_events; /* Number of NETDEV_* events handled */
atomic64_t match_calls; /* Number of times iptables called mt */
/* Number of times iptables called mt from pre or post routing hooks */
atomic64_t match_calls_prepost;
/*
* match_found_sk_*: numbers related to the netfilter matching
* function finding a sock for the sk_buff.

View File

@ -183,7 +183,11 @@ char *pp_iface_stat(struct iface_stat *is)
res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
"list=list_head{...}, "
"ifname=%s, "
"total={rx={bytes=%llu, "
"total_dev={rx={bytes=%llu, "
"packets=%llu}, "
"tx={bytes=%llu, "
"packets=%llu}}, "
"total_skb={rx={bytes=%llu, "
"packets=%llu}, "
"tx={bytes=%llu, "
"packets=%llu}}, "
@ -198,10 +202,14 @@ char *pp_iface_stat(struct iface_stat *is)
"tag_stat_tree=rb_root{...}}",
is,
is->ifname,
is->totals[IFS_RX].bytes,
is->totals[IFS_RX].packets,
is->totals[IFS_TX].bytes,
is->totals[IFS_TX].packets,
is->totals_via_dev[IFS_RX].bytes,
is->totals_via_dev[IFS_RX].packets,
is->totals_via_dev[IFS_TX].bytes,
is->totals_via_dev[IFS_TX].packets,
is->totals_via_skb[IFS_RX].bytes,
is->totals_via_skb[IFS_RX].packets,
is->totals_via_skb[IFS_TX].bytes,
is->totals_via_skb[IFS_TX].packets,
is->last_known_valid,
is->last_known[IFS_RX].bytes,
is->last_known[IFS_RX].packets,