Go1.8实现watchdog功能,实现依赖启动服务程序

前端之家收集整理的这篇文章主要介绍了Go1.8实现watchdog功能,实现依赖启动服务程序前端之家小编觉得挺不错的,现在分享给大家,也给大家做个参考。
  1. package main
  2.  
  3. import (
  4. "flag"
  5. "fmt"
  6. "log"
  7. "os"
  8. "os/signal"
  9. "os/user"
  10. "path"
  11. "strconv"
  12. "strings"
  13. "sync"
  14. "syscall"
  15. "time"
  16.  
  17. "github.com/czxichen/command/watchdog"
  18. conf "github.com/dlintw/goconf"
  19. )
  20.  
  21. //+build windows,linux
  22.  
  23. const logDir = "./watchdog"
  24.  
  25. func newProc(svc *Service,null,pw *os.File) *os.ProcAttr {
  26. return &os.ProcAttr{Dir: svc.path,Files: []*os.File{null,pw,pw}}
  27. }
  28.  
  29. func setPriority(pid,priority uintptr) syscall.Errno {
  30. return 0
  31. }
  32.  
  33. var (
  34. logpath = flag.String("log_path","","Specify log path")
  35. configFile = flag.String("config","watchdog.ini","Watchdog configuration file")
  36. )
  37.  
  38. func cfgOpt(cfg *conf.ConfigFile,section,option string) string {
  39. if !cfg.HasOption(section,option) {
  40. return ""
  41. }
  42. s,err := cfg.GetString(section,option)
  43. if err != nil {
  44. log.Fatalf("Failed to get %s for %s: %v",option,err)
  45. }
  46. return s
  47. }
  48.  
  49. func svcOpt(cfg *conf.ConfigFile,service,option string,required bool) string {
  50. opt := cfgOpt(cfg,option)
  51. if opt == "" && required {
  52. log.Fatalf("Service %s has missing %s option",option)
  53. }
  54. return opt
  55. }
  56.  
  57. var signalNames = map[syscall.Signal]string{
  58. syscall.SIGINT: "SIGINT",syscall.SIGQUIT: "SIGQUIT",syscall.SIGTERM: "SIGTERM",}
  59.  
  60. func signalName(s syscall.Signal) string {
  61. if name,ok := signalNames[s]; ok {
  62. return name
  63. }
  64. return fmt.Sprintf("SIG %d",s)
  65. }
  66.  
  67. type Shutdowner interface {
  68. Shutdown()
  69. }
  70.  
  71. func shutdownHandler(server Shutdowner) {
  72. sigc := make(chan os.Signal, 3)
  73. signal.Notify(sigc,syscall.SIGINT,syscall.SIGQUIT,syscall.SIGTERM)
  74. go func() {
  75. for s := range sigc {
  76. name := s.String()
  77. if sig,ok := s.(syscall.Signal); ok {
  78. name = signalName(sig)
  79. }
  80. log.Printf("Received %v,initiating shutdown...",name)
  81. server.Shutdown()
  82. }
  83. }()
  84. }
  85.  
  86. var (
  87. restartDelay = 2 * time.Second
  88. restartBackoff = 5 * time.Second
  89. restartBackoffMax = 60 * time.Second
  90. )
  91.  
  92. type Watchdog struct {
  93. services map[string]*Service
  94. shutdown chan bool
  95. }
  96.  
  97. func NewWatchdog() *Watchdog {
  98. return &Watchdog{
  99. services: make(map[string]*Service),shutdown: make(chan bool),}
  100. }
  101.  
  102. //关闭服务
  103. func (w *Watchdog) Shutdown() {
  104. select {
  105. case w.shutdown <- true:
  106. default:
  107. }
  108. }
  109.  
  110. //添加服务,如果存在
  111. func (w *Watchdog) AddService(name,binary string) (*Service,error) {
  112. if _,ok := w.services[name]; ok {
  113. return nil,fmt.Errorf("Service %q already exists",name)
  114. }
  115.  
  116. svc := newService(name,binary)
  117. w.services[name] = svc
  118.  
  119. return svc,nil
  120. }
  121.  
  122. //启动服务
  123. func (w *Watchdog) Walk() {
  124. log.Printf("Seesaw watchdog starting...")
  125.  
  126. w.mapDependencies()
  127.  
  128. for _,svc := range w.services {
  129. go svc.run()
  130. }
  131. <-w.shutdown
  132. for _,svc := range w.services {
  133. go svc.stop()
  134. }
  135. for _,svc := range w.services {
  136. stopped := <-svc.stopped
  137. svc.stopped <- stopped
  138. }
  139. }
  140.  
  141. //设置依赖关系
  142. func (w *Watchdog) mapDependencies() {
  143. for name := range w.services {
  144. svc := w.services[name]
  145. for depName := range svc.dependencies {
  146. dep,ok := w.services[depName]
  147. if !ok {
  148. log.Fatalf("Failed to find dependency %q for service %q",depName,name)
  149. }
  150. svc.dependencies[depName] = dep //依赖谁,依赖启动后才会启动自身
  151. dep.dependents[svc.name] = svc //谁依赖它,依赖它的服务退出后,才退出本身
  152. }
  153. }
  154. }
  155.  
  156. //默认的优先级为0
  157. const prioProcess = 0
  158.  
  159. //定义服务的类型.
  160. type Service struct {
  161. name string
  162. binary string
  163. path string
  164. args []string
  165.  
  166. uid uint32
  167. gid uint32
  168. priority int
  169.  
  170. dependencies map[string]*Service
  171. dependents map[string]*Service
  172.  
  173. termTimeout time.Duration
  174.  
  175. lock sync.Mutex
  176. process *os.Process
  177.  
  178. done chan bool
  179. shutdown chan bool
  180. started chan bool
  181. stopped chan bool
  182.  
  183. failures uint64
  184. restarts uint64
  185.  
  186. lastFailure time.Time
  187. lastRestart time.Time
  188. }
  189.  
  190. //初始化一个Service.
  191. func newService(name,binary string) *Service {
  192. return &Service{
  193. name: name,binary: binary,args: make([]string, 0),dependencies: make(map[string]*Service),dependents: make(map[string]*Service),done: make(chan bool),shutdown: make(chan bool, 1),started: make(chan bool,stopped: make(chan bool,termTimeout: 5 * time.Second,}
  194. }
  195.  
  196. //给这个服务添加依赖.
  197. func (svc *Service) AddDependency(name string) {
  198. svc.dependencies[name] = nil
  199. }
  200.  
  201. //为服务添加启动参数.
  202. func (svc *Service) AddArgs(args string) {
  203. svc.args = strings.Fields(args)
  204. }
  205.  
  206. //为进程设置优先级,Windows下面无效.
  207. func (svc *Service) SetPriority(priority int) error {
  208. if priority < -20 || priority > 19 {
  209. return fmt.Errorf("Invalid priority %d - must be between -20 and 19",priority)
  210. }
  211. svc.priority = priority
  212. return nil
  213. }
  214.  
  215. func (svc *Service) SetTermTimeout(tt time.Duration) {
  216. svc.termTimeout = tt
  217. }
  218.  
  219. func (svc *Service) SetUser(username string) error {
  220. u,err := user.Lookup(username)
  221. if err != nil {
  222. return err
  223. }
  224. uid,err := strconv.Atoi(u.Uid)
  225. if err != nil {
  226. return err
  227. }
  228. gid,err := strconv.Atoi(u.Gid)
  229. if err != nil {
  230. return err
  231. }
  232. svc.uid = uint32(uid)
  233. svc.gid = uint32(gid)
  234. return nil
  235. }
  236.  
  237. func (svc *Service) run() {
  238. //如果存在依赖,要等依赖全部启动完毕之后才会自动自身.
  239. for _,dep := range svc.dependencies {
  240. log.Printf("Service %s waiting for %s to start",svc.name,dep.name)
  241. select {
  242. case started := <-dep.started:
  243. dep.started <- started
  244. case <-svc.shutdown:
  245. goto done
  246. }
  247. }
  248.  
  249. for {
  250. //如果启动失败,怎等待时间会延长,最大不超过restartBackoffMax时间
  251. //程序启动必须是阻塞的,不然会重复运行
  252. if svc.failures > 0 {
  253. delay := time.Duration(svc.failures) * restartBackoff
  254. if delay > restartBackoffMax {
  255. delay = restartBackoffMax
  256. }
  257. log.Printf("Service %s has Failed %d times - delaying %s before restart",svc.failures,delay)
  258.  
  259. select {
  260. case <-time.After(delay):
  261. case <-svc.shutdown:
  262. goto done
  263. }
  264. }
  265.  
  266. svc.restarts++
  267. svc.lastRestart = time.Now()
  268. svc.runOnce()
  269.  
  270. select {
  271. case <-time.After(restartDelay):
  272. case <-svc.shutdown:
  273. goto done
  274. }
  275. }
  276. done:
  277. svc.done <- true
  278. }
  279.  
  280. //为服务创建日志文件
  281. func (svc *Service) logFile() (*os.File,error) {
  282. logName := svc.name + ".log"
  283.  
  284. if err := os.MkdirAll(logDir, 0666); err != nil {
  285. if !os.IsExist(err) {
  286. return nil,err
  287. }
  288. }
  289. f,err := os.Create(path.Join(logDir,logName))
  290. if err != nil {
  291. return nil,err
  292. }
  293. fmt.Fprintf(f,"Log file for %s (stdout/stderr)\n",svc.name)
  294. fmt.Fprintf(f,"Created at: %s\n",time.Now().Format("2006/01/02 15:04:05"))
  295. return f,nil
  296. }
  297.  
  298. //运行程序
  299. func (svc *Service) runOnce() {
  300. args := make([]string,len(svc.args)+1)
  301. args[0] = svc.name
  302. copy(args[1:],svc.args)
  303.  
  304. fmt.Println("Args:",args)
  305. null,err := os.Open(os.DevNull)
  306. if err != nil {
  307. log.Printf("Service %s - Failed to open %s: %v",os.DevNull,err)
  308. return
  309. }
  310.  
  311. lfile,err := svc.logFile()
  312. if err != nil {
  313. log.Printf("Service %s - Failed to create log file: %v",err)
  314. null.Close()
  315. return
  316. }
  317.  
  318. attr := newProc(svc,lfile)
  319.  
  320. log.Printf("Starting service %s...",svc.name)
  321. proc,err := os.StartProcess(svc.binary,args,attr)
  322. if err != nil {
  323. log.Printf("Service %s Failed to start: %v",err)
  324. svc.lastFailure = time.Now()
  325. svc.failures++
  326. null.Close()
  327. return
  328. }
  329.  
  330. null.Close()
  331. lfile.Close()
  332. svc.lock.Lock()
  333. svc.process = proc
  334. svc.lock.Unlock()
  335.  
  336. if err := setPriority(uintptr(proc.Pid),uintptr(svc.priority)); err != 0 {
  337. log.Printf("Failed to set priority to %d for service %s: %v",svc.priority,err)
  338. }
  339. select {
  340. case svc.started <- true:
  341. default:
  342. }
  343.  
  344. state,err := svc.process.Wait()
  345. if err != nil {
  346. log.Printf("Service %s wait Failed with %v",err)
  347. svc.lastFailure = time.Now()
  348. svc.failures++
  349. return
  350. }
  351. if !state.Success() {
  352. log.Printf("Service %s exited with %v",state)
  353. svc.lastFailure = time.Now()
  354. svc.failures++
  355. return
  356. }
  357.  
  358. svc.failures = 0
  359. log.Printf("Service %s exited normally.",svc.name)
  360. }
  361.  
  362. //给进程发送信号
  363. func (svc *Service) signal(sig os.Signal) error {
  364. svc.lock.Lock()
  365. defer svc.lock.Unlock()
  366. if svc.process == nil {
  367. return nil
  368. }
  369. return svc.process.Signal(sig)
  370. }
  371.  
  372. //停止服务
  373. func (svc *Service) stop() {
  374. log.Printf("Stopping service %s...",svc.name)
  375. //等待依赖它的进程退出完毕之后再退出自己.
  376. for _,dep := range svc.dependents {
  377. log.Printf("Service %s waiting for %s to stop",dep.name)
  378. stopped := <-dep.stopped
  379. dep.stopped <- stopped
  380. }
  381.  
  382. svc.shutdown <- true
  383. //首先给进程发送退出信号,如果超时没有退出,则直接发送Kill信号.
  384. svc.signal(syscall.SIGTERM)
  385. select {
  386. case <-svc.done:
  387. case <-time.After(svc.termTimeout):
  388. svc.signal(syscall.SIGKILL)
  389. <-svc.done
  390. }
  391. log.Printf("Service %s stopped",svc.name)
  392. svc.stopped <- true
  393. }
  394.  
  395. func main() {
  396. flag.Parse()
  397. if *logpath == "" {
  398. *logpath = os.Args[0] + ".log"
  399. }
  400. logFile,err := os.Create(*logpath)
  401. if err != nil {
  402. log.Fatalf("Create log file error:%s\n",err.Error())
  403. }
  404. defer logFile.Close()
  405. log.SetOutput(logFile)
  406.  
  407. cfg,err := conf.ReadConfigFile(*configFile)
  408. if err != nil {
  409. log.Fatalf("Failed to read config file %q: %v",*configFile,err)
  410. }
  411.  
  412. fido := watchdog.NewWatchdog()
  413.  
  414. shutdownHandler(fido)
  415. for _,name := range cfg.GetSections() {
  416. if name == "default" {
  417. continue
  418. }
  419.  
  420. binary := svcOpt(cfg,name,"binary",true)
  421. args := svcOpt(cfg,"args",false)
  422.  
  423. svc,err := fido.AddService(name,binary)
  424. if err != nil {
  425. log.Fatalf("Failed to add service %q: %v",err)
  426. }
  427. svc.AddArgs(args)
  428. if dep := svcOpt(cfg,"dependency",false); dep != "" {
  429. svc.AddDependency(dep)
  430. }
  431. if opt := svcOpt(cfg,"priority",false); opt != "" {
  432. prio,err := strconv.Atoi(opt)
  433. if err != nil {
  434. log.Fatalf("Service %s has invalid priority %q: %v",opt,err)
  435. }
  436. if err := svc.SetPriority(prio); err != nil {
  437. log.Fatalf("Failed to set priority for service %s: %v",err)
  438. }
  439. }
  440. if opt := svcOpt(cfg,"term_timeout",false); opt != "" {
  441. tt,err := time.ParseDuration(opt)
  442. if err != nil {
  443. log.Fatalf("Service %s has invalid term_timeout %q: %v",err)
  444. }
  445. svc.SetTermTimeout(tt)
  446. }
  447.  
  448. if user := svcOpt(cfg,"user",false); user != "" {
  449. if err := svc.SetUser(user); err != nil {
  450. log.Fatalf("Failed to set user for service %s: %v",err)
  451. }
  452. }
  453. }
  454.  
  455. fido.Walk()
  456. }

猜你在找的设计模式相关文章