ct-bugcheck: Add tools to poll for and report ath10k firmware crashes.

This tool can periodically check for ath10k firmware crashes.
If it finds a crash, it will package up the binary crash dump,
some OS level things like dmesg, lspci, etc into a tar file.

It then notifies the user about the crash and asks them to report
the bug to the appropriate email address.

This is most useful when used with ath10k-ct driver and
CT ath10k firmware, but it should also report issues with stock
ath10k driver and firmware in case one has appropriate contacts
to debug them.

This tool could be extended later for other modules/bugs/etc.

Signed-off-by: Ben Greear <greearb@candelatech.com>
This commit is contained in:
Ben Greear 2016-08-24 14:20:58 -07:00 committed by Felix Fietkau
parent d66db35a1d
commit 545d86490c
4 changed files with 212 additions and 0 deletions

View file

@ -0,0 +1,52 @@
#
# Copyright (C) 2016 OpenWrt.org
#
# This is free software, licensed under the GNU General Public License v2.
# See /LICENSE for more information.
#
include $(TOPDIR)/rules.mk
include $(INCLUDE_DIR)/kernel.mk
PKG_NAME:=ct-bugcheck
PKG_RELEASE:=2016-07-21
include $(INCLUDE_DIR)/package.mk
define Package/ct-bugcheck
SECTION:=utils
CATEGORY:=Utilities
TITLE:=Bug checking and reporting utility
VERSION:=$(PKG_RELEASE)
MAINTAINER:=Ben Greear <greearb@candelatech.com>
endef
define Package/ct-bugcheck/description
Scripts to check for bugs (like firmware crashes) and package them for reporting.
Currently this script only checks for ath10k firmware crashes.
Once installed, you can enable this tool by creating a file called
/etc/config/bugcheck with the following contents:
DO_BUGCHECK=1
export DO_BUGCHECK
endef
define Build/Prepare
$(CP) src/bugcheck.sh $(PKG_BUILD_DIR)/
$(CP) src/bugchecker.sh $(PKG_BUILD_DIR)/
$(CP) src/bugcheck.initd $(PKG_BUILD_DIR)/
endef
define Build/Compile
true
endef
define Package/ct-bugcheck/install
$(INSTALL_DIR) $(1)/usr/bin
$(INSTALL_DIR) $(1)/etc/init.d
$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugcheck.sh $(1)/usr/bin/
$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugchecker.sh $(1)/usr/bin/
$(INSTALL_BIN) $(PKG_BUILD_DIR)/bugcheck.initd $(1)/etc/init.d/bugcheck
endef
$(eval $(call BuildPackage,ct-bugcheck))

View file

@ -0,0 +1,16 @@
#!/bin/sh /etc/rc.common
# Copyright (C) 2016 OpenWrt.org
START=99
USE_PROCD=1
PROG=/usr/bin/bugchecker.sh
# To actually make bugchecker.sh run, see comments
# at top of its file.
start_service () {
procd_open_instance
procd_set_param command "$PROG"
procd_close_instance
}

View file

@ -0,0 +1,115 @@
#!/bin/sh
# Check for ath10k (and maybe other) bugs, package them up,
# and let user know what to do with them.
TMPLOC=/tmp
CRASHDIR=$TMPLOC/bugcheck
FOUND_BUG=0
# set -x
bugcheck_generic()
{
echo "LEDE crashlog report" > $CRASHDIR/info.txt
date >> $CRASHDIR/info.txt
echo >> $CRASHDIR/info.txt
echo "uname" >> $CRASHDIR/info.txt
uname -a >> $CRASHDIR/info.txt
echo >> $CRASHDIR/info.txt
echo "os-release" >> $CRASHDIR/info.txt
cat /etc/os-release >> $CRASHDIR/info.txt
echo >> $CRASHDIR/info.txt
echo "os-release" >> $CRASHDIR/info.txt
cat /etc/os-release >> $CRASHDIR/info.txt
echo >> $CRASHDIR/info.txt
echo "dmesg output" >> $CRASHDIR/info.txt
dmesg >> $CRASHDIR/info.txt
if [ -x /usr/bin/lspci ]
then
echo >> $CRASHDIR/info.txt
echo "lspci" >> $CRASHDIR/info.txt
lspci >> $CRASHDIR/info.txt
fi
echo >> $CRASHDIR/info.txt
echo "cpuinfo" >> $CRASHDIR/info.txt
cat /proc/cpuinfo >> $CRASHDIR/info.txt
echo >> $CRASHDIR/info.txt
echo "meminfo" >> $CRASHDIR/info.txt
cat /proc/cpuinfo >> $CRASHDIR/info.txt
echo >> $CRASHDIR/info.txt
echo "cmdline" >> $CRASHDIR/info.txt
cat /proc/cmdline >> $CRASHDIR/info.txt
echo >> $CRASHDIR/info.txt
echo "lsmod" >> $CRASHDIR/info.txt
lsmod >> $CRASHDIR/info.txt
}
roll_crashes()
{
# Roll any existing crashes
if [ -d $CRASHDIR ]
then
if [ -d $CRASHDIR.1 ]
then
rm -fr $CRASHDIR.1
fi
mv $CRASHDIR $CRASHDIR.1
fi
# Prepare location
mkdir -p $CRASHDIR
}
# ath10k, check debugfs entries.
for i in /sys/kernel/debug/ieee80211/*/ath10k/fw_crash_dump
do
#echo "Checking $i"
if cat $i > $TMPLOC/ath10k_crash.bin 2>&1
then
FOUND_BUG=1
#echo "Found ath10k crash data in $i"
roll_crashes
ADIR=${i/fw_crash_dump/}
CTFW=0
if grep -- -ct- $TMPLOC/ath10k_crash.bin > /dev/null 2>&1
then
CTFW=1
fi
echo "Send bug reports to:" > $CRASHDIR/report_to.txt
if [ -f $ADIR/ct_special -o $CTFW == "1" ]
then
# Looks like this is CT firmware or driver...
echo "greearb@candelatech.com" >> $CRASHDIR/report_to.txt
echo "and/or report or check for duplicates here:" >> $CRASHDIR/report_to.txt
echo "https://github.com/greearb/ath10k-ct/issues" >> $CRASHDIR/report_to.txt
else
# Not sure who would want these bug reports for upstream...
echo "https://www.lede-project.org/" >> $CRASHDIR/report_to.txt
fi
echo >> $CRASHDIR/report_to.txt
echo "Please attach all files in this directory to bug reports." >> $CRASHDIR/report_to.txt
mv $TMPLOC/ath10k_crash.bin $CRASHDIR
# Add any more ath10k specific stuff here.
# And call generic bug reporting logic
bugcheck_generic
fi
done
if [ $FOUND_BUG == "1" ]
then
# Notify LUCI somehow?
echo "bugcheck.sh found an issue to be reported" > /dev/kmsg
echo "See $CRASHDIR for details on how to report this" > /dev/kmsg
# Let calling code know something was wrong.
exit 1
fi
exit 0

View file

@ -0,0 +1,29 @@
#!/bin/sh
# Periodically call bugcheck.sh script
CHECKER=bugcheck.sh
SLEEPFOR=60
DO_BUGCHECK=0
# So, to enable this, you create an /etc/config/bugcheck file
# with contents like:
# DO_BUGCHECK=1
# export DO_BUGCHECK
if [ -f /etc/config/bugcheck ]
then
. /etc/config/bugcheck
fi
if [ $DO_BUGCHECK == 0 ]
then
exit 0
fi
while true
do
$CHECKER
sleep $SLEEPFOR
done