Changes between Version 8 and Version 9 of NuWa_Slave

Show
Ignore:
Timestamp:
09/02/10 17:06:03 (14 years ago)
Author:
blyth (IP: 140.112.102.77)
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • NuWa_Slave

    v8 v9  
    317317}}} 
    318318  
    319  
    320  
    321  
    322   
    323  
    324  
    325  
    326  
    327  
     319  
     320== Investigating Issues == 
     321 
     322The primary duty is to isolate the cause and report the problem to the author/responsible in the form of a Trac ticket that  
     323enables the investigator to rapidly reproduce the issue. 
     324 
     325While investigating remember to stop the slave to avoid interference and resource competition from additional builds starting ... 
     326eg if using supervisord :  
     327{{{ 
     328[blyth@cms01 dybgaudi]$ supervisorctl 
     329dybslv                           RUNNING    pid 28651, uptime 1 day, 22:27:01 
     330C> stop dybslv 
     331dybslv: stopped 
     332}}} 
     333 
     334=== attach to python nuwa.py process with gdb === 
     335 
     336Start the failing test : 
     337{{{ 
     338[blyth@cms01 MDC10b]$ nosetests tests/test_mdc10b.py:test_dby0 
     339Warning in <TEnvRec::ChangeValue>: duplicate entry <Library.vector<short>=vector.dll> for level 0; ignored 
     340Run MDC10b.runLED_Muon.FullChain with double-pulsing of LEDs and no muons to produces 50 readouts ... 
     341}}} 
     342  
     343Attach gdb to the process and continue '''c''' : 
     344{{{ 
     345[blyth@cms01 dybgaudi]$ gdb `which python` $(pgrep -f $(which nuwa.py)) 
     346... 
     347Loaded symbols for /data/env/local/dyb/trunk/NuWa-trunk/dybgaudi/InstallArea/i686-slc4-gcc34-dbg/lib/libG4DataHelpers.so 
     3480xb6687b23 in ParticlePropertySvc::anti (this=0xaa28798, pp=0xaa66a98) at ../src/ParticlePropertySvc/ParticlePropertySvc.cpp:445 
     349445         const ParticleProperty* ap = *it ; 
     350(gdb) 
     351}}} 
     352 
     353Unfortunately this approach sometimes gets '''Killed''' for gdb '''Out of Memory'''.   
     354 
     355=== running the command under gdb === 
     356 
     357Grab the command from the source of the test(if simple) or process table : 
     358{{{ 
     359ps --no-headers -o command  -p $(pgrep -f $(which nuwa.py)) > cmd 
     360}}} 
     361 
     362Edit the cmd file, fixup any missing quotes and prefixing with gdb command : '''set args''' 
     363 
     364Allowing : 
     365{{{ 
     366[blyth@cms01 dybgaudi]$ gdb `which python` -x cmd 
     367GNU gdb Red Hat Linux (6.3.0.0-1.162.el4rh) 
     368Copyright 2004 Free Software Foundation, Inc. 
     369... 
     370}}} 
     371 
     372 
     373Capture the backtrace '''bt''' when meet problems : 
     374{{{ 
     375ElecSimProc                           INFO Processing hit collections 
     376ToolSvc.EsIdealFeeTool                INFO Processing 73 pmt pulses. 
     377ToolSvc.TsMultTriggerTool             INFO Max multiplicity for DayaBayAD1 is 44 
     378*** glibc detected *** malloc(): memory corruption: 0x0fe95d10 *** 
     379 
     380Program received signal SIGABRT, Aborted. 
     381[Switching to Thread -1208318272 (LWP 17858)] 
     3820x00a1e7a2 in _dl_sysinfo_int80 () from /lib/ld-linux.so.2 
     383(gdb) 
     384 
     385(gdb) bt 
     386#0  0x00a1e7a2 in _dl_sysinfo_int80 () from /lib/ld-linux.so.2 
     387#1  0x00a5f915 in raise () from /lib/tls/libc.so.6 
     388#2  0x00a61379 in abort () from /lib/tls/libc.so.6 
     389#3  0x00a93e1a in __libc_message () from /lib/tls/libc.so.6 
     390#4  0x00a9b473 in _int_malloc () from /lib/tls/libc.so.6 
     391#5  0x00a9d0f1 in malloc () from /lib/tls/libc.so.6 
     392#6  0x04fa911e in operator new () from /usr/lib/libstdc++.so.6 
     393#7  0x032762ca in __gnu_cxx::new_allocator<std::_Rb_tree_node<std::pair<char const* const, DybDaq::FeeTraits*> > >::allocate (this=0x32798c4, __n=1) at /usr/lib/gcc/i386-redhat-linux/3.4.6/../../../../include/c++/3.4.6/ext/new_allocator.h:81 
     394#8  0x03276232 in std::_Rb_tree<char const*, std::pair<char const* const, DybDaq::FeeTraits*>, std::_Select1st<std::pair<char const* const, DybDaq::FeeTraits*> >, std::less<char const*>, std::allocator<std::pair<char const* const, DybDaq::FeeTraits*> > >::_M_get_node (this=0x32798c4) at /usr/lib/gcc/i386-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_tree.h:356 
     395#9  0x03276159 in std::_Rb_tree<char const*, std::pair<char const* const, DybDaq::FeeTraits*>, std::_Select1st<std::pair<char const* const, DybDaq::FeeTraits*> >, std::less<char const*>, std::allocator<std::pair<char const* const, DybDaq::FeeTraits*> > >::_M_create_node (this=0x32798c4, __x=@0xbfe81c88) at /usr/lib/gcc/i386-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_tree.h:365 
     396#10 0x03275ce5 in std::_Rb_tree<char const*, std::pair<char const* const, DybDaq::FeeTraits*>, std::_Select1st<std::pair<char const* const, DybDaq::FeeTraits*> >, std::less<char const*>, std::allocator<std::pair<char const* const, DybDaq::FeeTraits*> > >::_M_insert (this=0x32798c4, __x=0x0, __p=0xfe95b88, __v=@0xbfe81c88) at /usr/lib/gcc/i386-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_tree.h:809 
     397#11 0x03275ac9 in std::_Rb_tree<char const*, std::pair<char const* const, DybDaq::FeeTraits*>, std::_Select1st<std::pair<char const* const, DybDaq::FeeTraits*> >, std::less<char const*>, std::allocator<std::pair<char const* const, DybDaq::FeeTraits*> > >::insert_unique (this=0x32798c4, __v=@0xbfe81c88) at /usr/lib/gcc/i386-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_tree.h:929 
     398#12 0x0327583f in std::map<char const*, DybDaq::FeeTraits*, std::less<char const*>, std::allocator<std::pair<char const* const, DybDaq::FeeTraits*> > >::insert (this=0x32798c4, __x=@0xbfe81c88) 
     399    at /usr/lib/gcc/i386-redhat-linux/3.4.6/../../../../include/c++/3.4.6/bits/stl_map.h:360 
     400#13 0x032755cf in DybDaq::FeeTraits::defaultTraits () at ../src/FeeTraits.cc:52 
     401#14 0xb5880e3c in DayaBay::DaqReadoutPmtCrate::channel (this=0xfe97a80, channelId=@0xbfe81dc0) at ../src/DaqReadoutPmtCrate.cc:170 
     402#15 0xb5884bd5 in DayaBay::ReadoutPmtCrate::daqReadout (this=0xfe97780, run=0, event=0) at ../src/ReadoutPmtCrate.cc:77 
     403#16 0xaeb14500 in SingleLoader::execute (this=0xab6ec28) at ../src/SingleLoader.cc:112 
     404#17 0x03f95d2c in Algorithm::sysExecute (this=0xab6ec28) at ../src/Lib/Algorithm.cpp:558 
     405#18 0xaeb1f6fc in DybAlgorithm<DayaBay::ReadoutHeader>::sysExecute (this=0xab6ec28) at /data/env/local/dyb/trunk/NuWa-trunk/dybgaudi/InstallArea/include/DybAlg/DybAlgorithmImp.h:59 
     406#19 0x01825d45 in GaudiSequencer::execute (this=0xab6bc00) at ../src/lib/GaudiSequencer.cpp:100 
     407#20 0xb58d3823 in Stage::nextElement (this=0xab6ae78, pIStgData=@0xbfe8248c, erase=true) at ../src/Stage.cc:48 
     408#21 0xb58c0a4e in Sim15::execute (this=0xaae7608) at ../src/Sim15.cc:121 
     409Killed 
     410}}} 
     411 
     412 
     413Report findings in Trac tickets such as #565 
     414 
     415 
     416 
     417 
     418 
     419  
     420  
     421 
     422 
     423 
     424  
     425 
     426 
     427 
     428 
     429