/** This class calls its monitor every minute. Killing this process if they don't return **/ publicclassWatchdogextendsThread { ... ...
/* This handler will be used to post message back onto the main thread */ final ArrayList<HandlerChecker> mHandlerCheckers = newArrayList<>(); final HandlerChecker mMonitorChecker; ... ... publicstatic Watchdog getInstance() { if (sWatchdog == null) { sWatchdog = newWatchdog(); // new一个Watchdog对象 }
return sWatchdog; } ... ... privateWatchdog() { super("watchdog"); // Initialize handler checkers for each common thread we want to check. Note // that we are not currently checking the background thread, since it can // potentially hold longer running operations with no guarantees about the timeliness // of operations there.
// The shared foreground thread is the main checker. It is where we // will also dispatch monitor checks and do other work. // 可以看一下FgThread的构造函数,super("android.fg", xxx, true),所以其实就是android.fg线程 mMonitorChecker = newHandlerChecker(FgThread.getHandler(), "foreground thread", DEFAULT_TIMEOUT); mHandlerCheckers.add(mMonitorChecker); //添加MonitorChecker // Add checker for main thread. We only do a quick check since there // can be UI running on the thread. // 添加Looper Checker mHandlerCheckers.add(newHandlerChecker(newHandler(Looper.getMainLooper()), "main thread", DEFAULT_TIMEOUT)); // Add checker for shared UI thread. mHandlerCheckers.add(newHandlerChecker(UiThread.getHandler(), "ui thread", DEFAULT_TIMEOUT)); // And also check IO thread. mHandlerCheckers.add(newHandlerChecker(IoThread.getHandler(), "i/o thread", DEFAULT_TIMEOUT)); // And the display thread. mHandlerCheckers.add(newHandlerChecker(DisplayThread.getHandler(), "display thread", DEFAULT_TIMEOUT));
// Initialize monitor for Binder threads. addMonitor(newBinderThreadMonitor()); }
Monitor Checker和Looper Checker的侧重点不一样,前者预警我们不能长时间持有核心系统服务的对象锁,否则会阻塞很多函数的运行;后者预警我们不能长时间的霸占消息队列,否则其他消息将得不到处理。这两类都会导致系统卡住(System Not Responding)。
@Override publicvoidrun() { booleanwaitedHalf=false; while (true) { final ArrayList<HandlerChecker> blockedCheckers; final String subject; finalboolean allowRestart; intdebuggerWasConnected=0; synchronized (this) { longtimeout= CHECK_INTERVAL; // 值为30s // Make sure we (re)spin the checkers that have become idle within // this wait-and-check interval // 2.3.1 调度所有的HandlerChecker,给所有受监控的线程发送消息 for(int i=0; i<mHandlerCheckers.size(); i++) { HandlerCheckerhc= mHandlerCheckers.get(i); hc.scheduleCheckLocked(); }
if (debuggerWasConnected > 0) { debuggerWasConnected--; }
// NOTE: We use uptimeMillis() here because we do not want to increment the time we // wait while asleep. If the device is asleep then the thing that we are waiting // to timeout on is asleep as well and won't have a chance to run, causing a false // positive on when to kill things. // 2.3.2 睡眠一段时间 longstart= SystemClock.uptimeMillis(); while (timeout > 0) { if (Debug.isDebuggerConnected()) { debuggerWasConnected = 2; } try { wait(timeout); // 线程休眠且释放锁 } catch (InterruptedException e) { Log.wtf(TAG, e); } if (Debug.isDebuggerConnected()) { debuggerWasConnected = 2; } timeout = CHECK_INTERVAL - (SystemClock.uptimeMillis() - start); }
// 2.3.3 检查是否有线程或者服务出了问题 finalintwaitState= evaluateCheckerCompletionLocked(); if (waitState == COMPLETED) { // The monitors have returned; reset waitedHalf = false; continue; } elseif (waitState == WAITING) { // still waiting but within their configured intervals; back off and recheck continue; } elseif (waitState == WAITED_HALF) { if (!waitedHalf) { // We've waited half the deadlock-detection interval. Pull a stack // trace and wait another half. ArrayList<Integer> pids = newArrayList<Integer>(); pids.add(Process.myPid()); ActivityManagerService.dumpStackTraces(true, pids, null, null, NATIVE_STACKS_OF_INTEREST); waitedHalf = true; } continue; }
// If we got here, that means that the system is most likely hung. // First collect stack traces from all threads of the system process. // Then kill this process so that the system will restart. // 走到这里意味着系统hung了,首先收集系统进程所有线程的stack trace,然后kill进程以重启。 EventLog.writeEvent(EventLogTags.WATCHDOG, subject);
ArrayList<Integer> pids = newArrayList<Integer>(); pids.add(Process.myPid()); if (mPhonePid > 0) pids.add(mPhonePid); // Pass !waitedHalf so that just in case we somehow wind up here without having // dumped the halfway stacks, we properly re-initialize the trace file. finalFilestack= ActivityManagerService.dumpStackTraces( !waitedHalf, pids, null, null, NATIVE_STACKS_OF_INTEREST);
// Give some extra time to make sure the stack traces get written. // The system's been hanging for a minute, another second or two won't hurt much. // 线程休眠2秒钟以确保trace输出完毕 SystemClock.sleep(2000);
// Only kill the process if the debugger is not attached. if (Debug.isDebuggerConnected()) { debuggerWasConnected = 2; } if (debuggerWasConnected >= 2) { Slog.w(TAG, "Debugger connected: Watchdog is *not* killing the system process"); } elseif (debuggerWasConnected > 0) { Slog.w(TAG, "Debugger was connected: Watchdog is *not* killing the system process"); } elseif (!allowRestart) { Slog.w(TAG, "Restart not allowed: Watchdog is *not* killing the system process"); } else { Slog.w(TAG, "*** WATCHDOG KILLING SYSTEM PROCESS: " + subject); for (int i=0; i<blockedCheckers.size(); i++) { Slog.w(TAG, blockedCheckers.get(i).getName() + " stack trace:"); StackTraceElement[] stackTrace = blockedCheckers.get(i).getThread().getStackTrace(); for (StackTraceElement element: stackTrace) { Slog.w(TAG, " at " + element); } } // 2.3.5 保存日志,判断是否需要杀掉系统进程 Slog.w(TAG, "*** GOODBYE!"); // 杀死SystemServer Process.killProcess(Process.myPid()); System.exit(10); }