Alec Matusis
matusis at yahoo.com
Tue Jan 22 21:49:33 EST 2008
> (gdb) thread apply all bt Thread 1 (Thread 1111562592 (LWP 3812)): #0 0x00002aaaab2c94df in sem_wait () from /lib64/tls/libpthread.so.0 #1 0x00002aaaac204dcd in PyThread_acquire_lock (lock=0x5b7820, waitflag=1) at thread_pthread.h:298 #2 0x00002aaaac1f7008 in PyImport_ImportModuleEx (name=0x2aaaad7195c4 "errno", globals=0x2aaaada331c0, locals=<value optimized out>, fromlist=0x2aaaad75e998) at import.c:239 #3 0x00002aaaac230883 in builtin___import__ (self=<value optimized out>, args=<value optimized out>) at bltinmodule.c:45 #4 0x00002aaaac19c91c in PyCFunction_Call (func=0x2aaaad6f9440, arg=0x2aaab0a68c58, kw=<value optimized out>) at methodobject.c:93 #5 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, arg=<value optimized out>, kw=<value optimized out>) at abstract.c:1751 #6 0x00002aaaac1cb58e in PyEval_CallObjectWithKeywords (func=0x2aaaad6f9440, arg=0x2aaab0a68c58, kw=0x0) at ceval.c:3419 #7 0x00002aaaac1cda14 in PyEval_EvalFrame (f=0x8fe4e0) at ceval.c:2020 #8 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaad75fb20, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=2, kws=0xb71220, kwcount=0, defs=0x2aaaad761e68, defcount=1, closure=0x0) at ceval.c:2730 #9 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0xb71070) at ceval.c:3639 #10 0x00002aaaac1d26d7 in PyEval_EvalFrame (f=0xa54890) at ceval.c:3629 #11 0x00002aaaac1d26d7 in PyEval_EvalFrame (f=0x891e10) at ceval.c:3629 #12 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaab0a78f10, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=4, kws=0x0, kwcount=0, defs=0x2aaab0a5bde8, defcount=2, closure=0x0) at ceval.c:2730 #13 0x00002aaaac22acc7 in function_call (func=0x2aaab0aa3050, arg=0x2aaab0a68520, kw=0x0) at funcobject.c:548 #14 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, arg=<value optimized out>, kw=<value optimized out>) at abstract.c:1751 #15 0x00002aaaac183871 in instancemethod_call (func=<value optimized out>, arg=0x2aaab0a68520, kw=0x0) at classobject.c:2431 #16 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, arg=<value optimized out>, kw=<value optimized out>) at abstract.c:1751 #17 0x00002aaaac1cb58e in PyEval_CallObjectWithKeywords (func=0x2aaab00c1d20, arg=0x2aaab0aa1280, kw=0x0) at ceval.c:3419 #18 0x00002aaaac18449e in PyInstance_New (klass=<value optimized out>, arg=0x2aaab0aa1280, kw=0x0) at classobject.c:575 #19 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, arg=<value optimized out>, kw=<value optimized out>) at abstract.c:1751 #20 0x00002aaaac1cf5b1 in PyEval_EvalFrame (f=0x8fe7c0) at ceval.c:3755 #21 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaab0a9f810, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=2, kws=0x2aaaada67ec0, kwcount=0, defs=0x2aaab0a5a728, defcount=2, closure=0x0) at ceval.c:2730 #22 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x2aaaada67cf0) at ceval.c:3639 #23 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaad762880, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=1, kws=0x987808, kwcount=0, defs=0x2aaaadc040f8, defcount=2, closure=0x0) at ceval.c:2730 #24 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x987650) at ceval.c:3639 #25 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaab00fc8f0, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=1, kws=0x7c29e8, kwcount=0, defs=0x2aaab0095a88, defcount=2, closure=0x0) at ceval.c:2730 #26 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x7c27b0) at ceval.c:3639 #27 0x00002aaaac1d26d7 in PyEval_EvalFrame (f=0xb04790) at ceval.c:3629 #28 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaaec4c960, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=1, kws=0x2aaaadd5cd48, kwcount=0, defs=0x0, defcount=0, closure=0x0) at ceval.c:2730 #29 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x2aaaadd5cb60) at ceval.c:3639 #30 0x00002aaaac1d26d7 in PyEval_EvalFrame (f=0x2aaaaddc8b90) at ceval.c:3629 #31 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaaec5e570, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=0, kws=0x2aaaadd67568, kwcount=7, defs=0x0, defcount=0, closure=0x0) at ceval.c:2730 #32 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x2aaaadd67340) at ceval.c:3639 #33 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaaec5e730, globals=<value optimized out>, locals=<value optimized out>, args=<value optimized out>, argcount=2, kws=0x0, kwcount=0, defs=0x0, defcount=0, closure=0x0) at ceval.c:2730 #34 0x00002aaaac22acc7 in function_call (func=0x2aaaaf50fed8, arg=0x2aaab0158fc8, kw=0x0) at funcobject.c:548 #35 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, arg=<value optimized out>, kw=<value optimized out>) at abstract.c:1751 #36 0x00002aaaac183871 in instancemethod_call (func=<value optimized out>, arg=0x2aaab0158fc8, kw=0x0) at classobject.c:2431 #37 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, arg=<value optimized out>, kw=<value optimized out>) at abstract.c:1751 #38 0x00002aaaac17828d in PyObject_CallMethod (o=<value optimized out>, name=<value optimized out>, format=0x2aaab0a63190 "\001") at abstract.c:1840 #39 0x00002aaaac17603a in python_handler (req=0x853098, phase=0x2aaaac23583f "PythonHandler") at mod_python.c:1652 #40 0x00000000004328e1 in ap_run_handler (r=0x853098) at config.c:157 #41 0x00000000004359a9 in ap_invoke_handler (r=0x853098) at config.c:372 #42 0x000000000043fb2a in ap_process_request (r=0x853098) at http_request.c:258 #43 0x000000000043d1fd in ap_process_http_connection (c=0x2aaaada478e0) at http_core.c:184 #44 0x0000000000439621 in ap_run_process_connection (c=0x2aaaada478e0) at connection.c:43 #45 0x00000000004443e2 in worker_thread (thd=0x5a9d20, dummy=<value optimized out>) at worker.c:544 #46 0x00002aaaaaf7417b in dummy_worker (opaque=<value optimized out>) at thread.c:142 #47 0x00002aaaab2c584f in start_thread () from /lib64/tls/libpthread.so.0 #48 0x00002aaaab591703 in clone () from /lib64/tls/libc.so.6 #49 0x0000000000000000 in ?? () #50 0x0000000000000000 in ?? () > When moving to worker MPM did you go over your code and made sure it > was thread safe in the way it manipulated global objects in modules? > Are all the third party modules you use thread safe? We eliminated all global objects, so at least there were no errors in the application logs. We are using three third-party modules: MySQLdb (compiled in thread-safe mode), PIL, and Cheetah templates. > Generally it is really bad to do stuff at time of import because the > global Python import lock is held at that time and while that is held > any other threads which want to import a module can't. I have to look into this more; we have a very simple custom importer: def _import( module_path ): '''this is a custom import function. expects a dotted-notation module name imports the module efficiently, pulls out the class named the same as the module from the module, returns it''' try: runme = __import__( module_path, globals(), locals(), []) for comp in module_path.split('.')[1:]: runme = getattr(runme, comp) except ImportError,e: raise Exception,'Unable to load: '+module_path+' '+str(e) #get the last part class_name = module_path.split('.')[-1] try: runme = getattr(runme, class_name ) except AttributeError: raise Exception,'Module does not contain class '+class_name return runme In publisher, we have: runme = _import( module_path ) flow_instance = runme(req) flow_instance.dispatch() response_buffer = flow_instance.process_template() req.send_http_header() req.write(response_buffer) > -----Original Message----- > From: Graham Dumpleton [mailto:graham.dumpleton at gmail.com] > Sent: Tuesday, January 22, 2008 6:36 PM > To: Alec Matusis > Cc: mod_python at modpython.org > Subject: Re: [mod_python] remnant 'orphan' apache subprocesses > > You may need to get stack traces from all threads: > > thread apply all bt > > What this one thread shows is that some Python code in your > application is doing complicated stuff at global level in a module. In > other words, the code is being run at time of import of the module > rather than as a result of a specific request occurring. > > Generally it is really bad to do stuff at time of import because the > global Python import lock is held at that time and while that is held > any other threads which want to import a module can't. This means that > if the operation being performed blocks, it will effectively block all > the other threads when they in turn need to import something. Worse is > if that first thread deadlocked in some way as it would stay that way > forever. > > When moving to worker MPM did you go over your code and made sure it > was thread safe in the way it manipulated global objects in modules? > Are all the third party modules you use thread safe? > > Graham > > On 23/01/2008, Alec Matusis <matusis at yahoo.com> wrote: > > > gdb /usr/bin/httpd 32427 > > > > (gdb) where > > #0 0x00002aaaab2c94df in sem_wait () from /lib64/tls/libpthread.so.0 > > #1 0x00002aaaac204dcd in PyThread_acquire_lock (lock=0x5b7820, > waitflag=1) > > at thread_pthread.h:298 > > #2 0x00002aaaac1f7008 in PyImport_ImportModuleEx > (name=0x2aaaad7195c4 > > "errno", globals=0x2aaaada331c0, > > locals=<value optimized out>, fromlist=0x2aaaad75e998) at > import.c:239 > > #3 0x00002aaaac230883 in builtin___import__ (self=<value optimized > out>, > > args=<value optimized out>) at bltinmodule.c:45 > > #4 0x00002aaaac19c91c in PyCFunction_Call (func=0x2aaaad6f9440, > > arg=0x2aaab0a68c58, kw=<value optimized out>) at methodobject.c:93 > > #5 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, > > arg=<value optimized out>, kw=<value optimized out>) > > at abstract.c:1751 > > #6 0x00002aaaac1cb58e in PyEval_CallObjectWithKeywords > > (func=0x2aaaad6f9440, arg=0x2aaab0a68c58, kw=0x0) at ceval.c:3419 > > #7 0x00002aaaac1cda14 in PyEval_EvalFrame (f=0x8fe4e0) at > ceval.c:2020 > > #8 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaad75fb20, > > globals=<value optimized out>, locals=<value optimized out>, > > args=<value optimized out>, argcount=2, kws=0xb71220, kwcount=0, > > defs=0x2aaaad761e68, defcount=1, closure=0x0) at ceval.c:2730 > > #9 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0xb71070) at > ceval.c:3639 > > #10 0x00002aaaac1d26d7 in PyEval_EvalFrame (f=0xa54890) at > ceval.c:3629 > > #11 0x00002aaaac1d26d7 in PyEval_EvalFrame (f=0x891e10) at > ceval.c:3629 > > #12 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaab0a78f10, > > globals=<value optimized out>, locals=<value optimized out>, > > args=<value optimized out>, argcount=4, kws=0x0, kwcount=0, > > defs=0x2aaab0a5bde8, defcount=2, closure=0x0) at ceval.c:2730 > > #13 0x00002aaaac22acc7 in function_call (func=0x2aaab0aa3050, > > arg=0x2aaab0a68520, kw=0x0) at funcobject.c:548 > > #14 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, > > arg=<value optimized out>, kw=<value optimized out>) > > at abstract.c:1751 > > #15 0x00002aaaac183871 in instancemethod_call (func=<value optimized > out>, > > arg=0x2aaab0a68520, kw=0x0) at classobject.c:2431 > > #16 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, > > arg=<value optimized out>, kw=<value optimized out>) > > at abstract.c:1751 > > #17 0x00002aaaac1cb58e in PyEval_CallObjectWithKeywords > > (func=0x2aaab00c1d20, arg=0x2aaab0aa1280, kw=0x0) at ceval.c:3419 > > #18 0x00002aaaac18449e in PyInstance_New (klass=<value optimized > out>, > > arg=0x2aaab0aa1280, kw=0x0) at classobject.c:575 > > #19 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, > > arg=<value optimized out>, kw=<value optimized out>) > > at abstract.c:1751 > > #20 0x00002aaaac1cf5b1 in PyEval_EvalFrame (f=0x8fe7c0) at > ceval.c:3755 > > #21 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaab0a9f810, > > globals=<value optimized out>, locals=<value optimized out>, > > args=<value optimized out>, argcount=2, kws=0x2aaaada67ec0, > kwcount=0, > > defs=0x2aaab0a5a728, defcount=2, closure=0x0) > > at ceval.c:2730 > > #22 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x2aaaada67cf0) at > > ceval.c:3639 > > #23 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaad762880, > > globals=<value optimized out>, locals=<value optimized out>, > > args=<value optimized out>, argcount=1, kws=0x987808, kwcount=0, > > defs=0x2aaaadc040f8, defcount=2, closure=0x0) at ceval.c:2730 > > #24 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x987650) at > ceval.c:3639 > > #25 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaab00fc8f0, > > globals=<value optimized out>, locals=<value optimized out>, > > args=<value optimized out>, argcount=1, kws=0x7c29e8, kwcount=0, > > defs=0x2aaab0095a88, defcount=2, closure=0x0) at ceval.c:2730 > > #26 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x7c27b0) at > ceval.c:3639 > > #27 0x00002aaaac1d26d7 in PyEval_EvalFrame (f=0xb04790) at > ceval.c:3629 > > #28 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaaec4c960, > > globals=<value optimized out>, locals=<value optimized out>, > > args=<value optimized out>, argcount=1, kws=0x2aaaadd5cd48, > kwcount=0, > > defs=0x0, defcount=0, closure=0x0) at ceval.c:2730 > > #29 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x2aaaadd5cb60) at > > ceval.c:3639 > > #30 0x00002aaaac1d26d7 in PyEval_EvalFrame (f=0x2aaaaddc8b90) at > > ceval.c:3629 > > #31 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaaec5e570, > > globals=<value optimized out>, locals=<value optimized out>, > > args=<value optimized out>, argcount=0, kws=0x2aaaadd67568, > kwcount=7, > > defs=0x0, defcount=0, closure=0x0) at ceval.c:2730 > > #32 0x00002aaaac1d0714 in PyEval_EvalFrame (f=0x2aaaadd67340) at > > ceval.c:3639 > > #33 0x00002aaaac1d2d7e in PyEval_EvalCodeEx (co=0x2aaaaec5e730, > > globals=<value optimized out>, locals=<value optimized out>, > > args=<value optimized out>, argcount=2, kws=0x0, kwcount=0, > defs=0x0, > > defcount=0, closure=0x0) at ceval.c:2730 > > #34 0x00002aaaac22acc7 in function_call (func=0x2aaaaf50fed8, > > arg=0x2aaab0158fc8, kw=0x0) at funcobject.c:548 > > #35 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, > > arg=<value optimized out>, kw=<value optimized out>) > > at abstract.c:1751 > > #36 0x00002aaaac183871 in instancemethod_call (func=<value optimized > out>, > > arg=0x2aaab0158fc8, kw=0x0) at classobject.c:2431 > > #37 0x00002aaaac177470 in PyObject_Call (func=<value optimized out>, > > arg=<value optimized out>, kw=<value optimized out>) > > at abstract.c:1751 > > #38 0x00002aaaac17828d in PyObject_CallMethod (o=<value optimized > out>, > > name=<value optimized out>, format=0x2aaab0a63190 "\001") > > at abstract.c:1840 > > #39 0x00002aaaac17603a in python_handler (req=0x853098, > phase=0x2aaaac23583f > > "PythonHandler") at mod_python.c:1652 > > > > > -----Original Message----- > > > From: Graham Dumpleton [mailto:graham.dumpleton at gmail.com] > > > Sent: Tuesday, January 22, 2008 6:05 PM > > > To: Alec Matusis > > > Cc: mod_python at modpython.org > > > Subject: Re: [mod_python] remnant 'orphan' apache subprocesses > > > > > > As root, attach to the process by its pid and generate a full stack > > > trace. > > > > > > gdb /usr/bin/httpd 32427 > > > > > > ... > > > > > > $ where > > > > > > FWIW, for these processes to have a PPID of 1, then some code > running > > > in the process must have tried to daemonise the process, or some > > > measure of it, for example by calling setsid(). This would > > > disassociate it with proper Apache parent process. > > > > > > Graham > > > > > > On 23/01/2008, Alec Matusis <matusis at yahoo.com> wrote: > > > > Here is another potentially useful cue: > > > > 32427 is this orphan process. > > > > > > > > # strace -p 32427 > > > > Process 32427 attached - interrupt to quit > > > > futex(0x5b7820, FUTEX_WAIT, 0, NULL > > > > > > > > and it's just stuck there. > > > > > > > > > -----Original Message----- > > > > > From: Graham Dumpleton [mailto:graham.dumpleton at gmail.com] > > > > > Sent: Tuesday, January 22, 2008 5:49 PM > > > > > To: Alec Matusis > > > > > Cc: mod_python at modpython.org > > > > > Subject: Re: [mod_python] remnant 'orphan' apache subprocesses > > > > > > > > > > BTW, originally you said: > > > > > > > > > > """I have been investigating a memory leak that occurs on an > > > apache > > > > > server > > > > > since we switched to worker MPM. > > > > > I found that the source of it are apache subprocesses that lose > > > track > > > > > of > > > > > their parent and never exit:""" > > > > > > > > > > If the processes are truly zombie processes, then they > shouldn't > > > > > actually consume any resources except for the entry in the > process > > > > > table. Ie., they are an process accounting artifact. > > > > > > > > > > So, strictly speaking they shouldn't be causing any memory > leaks, > > > or > > > > > more correctly consuming memory which isn't released. > > > > > > > > > > Graham > > > > > > > > > > On 23/01/2008, Alec Matusis <matusis at yahoo.com> wrote: > > > > > > > > [Tue Jan 22 10:44:45 2008] [notice] child pid 8798 exit > > > signal > > > > > > > Segmentation > > > > > > > > fault (11) > > > > > > > > ... > > > > > > > > Fatal Python error: Inconsistent interned string state. > > > > > > > > > > > > > > This is corruption of memory used by Python. > > > > > > > > > > > > > > What version of mod_python are you using? > > > > > > > > > > > > Apache/2.2.6 (Unix) mod_python/3.3.1 Python/2.4.1 > > > > > > > > > > > > But since there is only one such log entry, I'd expect at > most 1 > > > > > zombie as a > > > > > > result of this? I have 6. > > > > > > > > > > > > > -----Original Message----- > > > > > > > From: Graham Dumpleton [mailto:graham.dumpleton at gmail.com] > > > > > > > Sent: Tuesday, January 22, 2008 5:15 PM > > > > > > > To: Alec Matusis > > > > > > > Cc: mod_python at modpython.org > > > > > > > Subject: Re: [mod_python] remnant 'orphan' apache > subprocesses > > > > > > > > > > > > > > On 23/01/2008, Alec Matusis <matusis at yahoo.com> wrote: > > > > > > > > > What do you get if you use a program like lsof or > ofiles to > > > > > work > > > > > > > out > > > > > > > > > what open resources the zombie process may still be > holding > > > on > > > > > to? > > > > > > > > > > > > > > > > There are 6 zombie sub processes now; executing lsof -p > pid > > > takes > > > > > > > ages, and > > > > > > > > it brings the load average up from 8.0 to 23+ on this > > > machine- so > > > > > I > > > > > > > am > > > > > > > > afraid to wait long enough to get a result. > > > > > > > > #ps -ef | grep httpd > > > > > > > > root 16197 1 0 Jan21 ? 00:00:15 > > > > > > > > nobody 23095 16197 0 08:44 ? 00:00:06 > > > > > > > > nobody 29548 1 0 13:14 ? 00:00:00 > > > > > > > > nobody 3812 1 0 13:57 ? 00:00:00 > > > > > > > > nobody 4161 1 0 13:59 ? 00:00:00 > > > > > > > > nobody 20110 1 0 15:43 ? 00:00:00 > > > > > > > > nobody 25399 1 0 16:17 ? 00:00:00 > > > > > > > > nobody 28722 1 0 16:38 ? 00:00:00 > > > > > > > > nobody 28971 16197 5 16:40 ? 00:00:20 > > > > > > > > nobody 29189 16197 7 16:42 ? 00:00:21 > > > > > > > > nobody 29327 16197 7 16:42 ? 00:00:18 > > > > > > > > nobody 29453 16197 6 16:43 ? 00:00:13 > > > > > > > > nobody 29496 16197 10 16:43 ? 00:00:20 > > > > > > > > nobody 29539 16197 9 16:43 ? 00:00:19 > > > > > > > > nobody 29639 16197 11 16:44 ? 00:00:14 > > > > > > > > nobody 29713 16197 11 16:45 ? 00:00:12 > > > > > > > > nobody 29804 16197 5 16:45 ? 00:00:05 > > > > > > > > nobody 29857 16197 10 16:45 ? 00:00:09 > > > > > > > > nobody 29902 16197 10 16:45 ? 00:00:08 > > > > > > > > nobody 29945 16197 11 16:46 ? 00:00:07 > > > > > > > > nobody 29998 16197 11 16:46 ? 00:00:06 > > > > > > > > nobody 30058 16197 16 16:47 ? 00:00:01 > > > > > > > > > > > > > > > > note that those zombie sub processes seem to have had > > > 00:00:00 > > > > > run > > > > > > > time, > > > > > > > > unlike normal sub processes. > > > > > > > > 3 entries in apache error logs this time: > > > > > > > > > > > > > > > > [Tue Jan 22 10:44:45 2008] [notice] child pid 8798 exit > > > signal > > > > > > > Segmentation > > > > > > > > fault (11) > > > > > > > > ... > > > > > > > > Fatal Python error: Inconsistent interned string state. > > > > > > > > > > > > > > This is corruption of memory used by Python. > > > > > > > > > > > > > > What version of mod_python are you using? > > > > > > > > > > > > > > > [Tue Jan 22 14:03:16 2008] [notice] child pid 4008 exit > > > signal > > > > > > > Aborted (6) > > > > > > > > > > > > > > > > is there a faster way to see what it is holding on to? > > > > > > > > > > > > > > I know you said the applications weren't spawning sub > > > processes, > > > > > but > > > > > > > if your system has 'ptree' try that. In other words, see if > > > those > > > > > > > daemon processes have children which still exist. This was > in > > > part > > > > > > > what I was hoping to see, ie., pipes to child processes. > The > > > other > > > > > > > thing I was looking for was stuck file accesses to NFS > mounted > > > > > > > filesystems or something. Alternative to ptree is just to > look > > > at > > > > > > > parent child relationships in ps output. > > > > > > > > > > > > > > Graham > > > > > > > > > > > > > > > > -----Original Message----- > > > > > > > > > From: Graham Dumpleton > [mailto:graham.dumpleton at gmail.com] > > > > > > > > > Sent: Monday, January 21, 2008 10:27 PM > > > > > > > > > To: Alec Matusis > > > > > > > > > Cc: mod_python at modpython.org > > > > > > > > > Subject: Re: [mod_python] remnant 'orphan' apache > > > subprocesses > > > > > > > > > > > > > > > > > > What do you get if you use a program like lsof or > ofiles to > > > > > work > > > > > > > out > > > > > > > > > what open resources the zombie process may still be > holding > > > on > > > > > to? > > > > > > > > > > > > > > > > > > Are you absolutely sure that that zombie process is > from > > > the > > > > > > > current > > > > > > > > > Apache instance and not perhaps an earlier instance of > > > Apache? > > > > > > > > > > > > > > > > > > Graham > > > > > > > > > > > > > > > > > > On 22/01/2008, Alec Matusis <matusis at yahoo.com> wrote: > > > > > > > > > > > Are CGI scripts used anywhere at all on your Apache > web > > > > > site? > > > > > > > > > > > > > > > > > > > > No, only mod_python and serving static files. > > > > > > > > > > > > > > > > > > > > > -----Original Message----- > > > > > > > > > > > From: Graham Dumpleton > > > [mailto:graham.dumpleton at gmail.com] > > > > > > > > > > > Sent: Monday, January 21, 2008 10:07 PM > > > > > > > > > > > To: Alec Matusis > > > > > > > > > > > Cc: mod_python at modpython.org > > > > > > > > > > > Subject: Re: [mod_python] remnant 'orphan' apache > > > > > subprocesses > > > > > > > > > > > > > > > > > > > > > > Are CGI scripts used anywhere at all on your Apache > web > > > > > site? > > > > > > > > > > > > > > > > > > > > > > On 22/01/2008, Alec Matusis <matusis at yahoo.com> > wrote: > > > > > > > > > > > > > What version of Apache are you using? > > > > > > > > > > > > > > > > > > > > > > > > 2.2.6 > > > > > > > > > > > > > > > > > > > > > > > > > What Python web application are you running on > top > > > of > > > > > > > > > mod_python, a > > > > > > > > > > > > > self built one or one that uses one of the > larger > > > web > > > > > > > > > frameworks? > > > > > > > > > > > > > > > > > > > > > > > > Only self-built stuff, nothing complicated. > > > > > > > > > > > > > > > > > > > > > > > > > Does your application create sub processes in > any > > > way > > > > > to > > > > > > > > > perform > > > > > > > > > > > > > additional work? > > > > > > > > > > > > > > > > > > > > > > > > No sub processes and no threads, except that we > use > > > > > MySQLdb > > > > > > > > > module > > > > > > > > > > > (which > > > > > > > > > > > > might create threads?). > > > > > > > > > > > > > > > > > > > > > > > > I noticed a warning in the error log: > > > > > > > > > > > > /live/scripts/_pro.py:100: Warning: Rows matched: > 1 > > > > > Changed: > > > > > > > 1 > > > > > > > > > > > Warnings: 1 > > > > > > > > > > > > (this is a mysql warning), but I would not think > this > > > is > > > > > > > > > relevant... > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > -----Original Message----- > > > > > > > > > > > > > From: Graham Dumpleton > > > > > [mailto:graham.dumpleton at gmail.com] > > > > > > > > > > > > > Sent: Monday, January 21, 2008 9:22 PM > > > > > > > > > > > > > To: Alec Matusis > > > > > > > > > > > > > Cc: mod_python at modpython.org > > > > > > > > > > > > > Subject: Re: [mod_python] remnant 'orphan' > apache > > > > > > > subprocesses > > > > > > > > > > > > > > > > > > > > > > > > > > What version of Apache are you using? > > > > > > > > > > > > > > > > > > > > > > > > > > What Python web application are you running on > top > > > of > > > > > > > > > mod_python, a > > > > > > > > > > > > > self built one or one that uses one of the > larger > > > web > > > > > > > > > frameworks? > > > > > > > > > > > > > > > > > > > > > > > > > > Does your application create sub processes in > any > > > way > > > > > to > > > > > > > > > perform > > > > > > > > > > > > > additional work? > > > > > > > > > > > > > > > > > > > > > > > > > > Graham > > > > > > > > > > > > > > > > > > > > > > > > > > On 22/01/2008, Alec Matusis <matusis at yahoo.com> > > > wrote: > > > > > > > > > > > > > > I have been investigating a memory leak that > > > occurs > > > > > on > > > > > > > an > > > > > > > > > apache > > > > > > > > > > > > > server > > > > > > > > > > > > > > since we switched to worker MPM. > > > > > > > > > > > > > > I found that the source of it are apache > > > subprocesses > > > > > > > that > > > > > > > > > lose > > > > > > > > > > > track > > > > > > > > > > > > > of > > > > > > > > > > > > > > their parent and never exit: > > > > > > > > > > > > > > > > > > > > > > > > > > > > root at web10 ~> ps -ef | grep httpd > > > > > > > > > > > > > > root 16197 1 0 02:00 ? > 00:00:09 > > > > > > > > > > > > > > /usr/local/encap/httpd/bin/httpd -f > > > > > > > /p2/web/conf/web10.conf - > > > > > > > > > k > > > > > > > > > > > start > > > > > > > > > > > > > > nobody 17750 1 0 17:53 ? > 00:00:00 > > > > > > > > > > > > > > /usr/local/encap/httpd/bin/httpd -f > > > > > > > /p2/web/conf/web10.conf - > > > > > > > > > k > > > > > > > > > > > start > > > > > > > > > > > > > > nobody 5112 16197 4 20:02 ? > 00:00:16 > > > > > > > > > > > > > > /usr/local/encap/httpd/bin/httpd -f > > > > > > > /p2/web/conf/web10.conf - > > > > > > > > > k > > > > > > > > > > > start > > > > > > > > > > > > > > nobody 5159 16197 4 20:02 ? > 00:00:15 > > > > > > > > > > > > > > /usr/local/encap/httpd/bin/httpd -f > > > > > > > /p2/web/conf/web10.conf - > > > > > > > > > k > > > > > > > > > > > start > > > > > > > > > > > > > > nobody 5300 16197 4 20:03 ? > 00:00:14 > > > > > > > > > > > > > > /usr/local/encap/httpd/bin/httpd -f > > > > > > > /p2/web/conf/web10.conf - > > > > > > > > > k > > > > > > > > > > > start > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > in this output, apache child pid 17750 has > pid 1 > > > as a > > > > > > > parent, > > > > > > > > > and > > > > > > > > > > > it > > > > > > > > > > > > > is one > > > > > > > > > > > > > > of those 'zombie children'. > > > > > > > > > > > > > > Pids 5112, 5159, 5300 were normal (parent is > pid > > > > > 16197), > > > > > > > and > > > > > > > > > > > they > > > > > > > > > > > > > exited > > > > > > > > > > > > > > after MaxRequestsPerChild was reached. > > > > > > > > > > > > > > > > > > > > > > > > > > > > Does anybody have any advice on this? I > cannot > > > > > correlate > > > > > > > this > > > > > > > > > to > > > > > > > > > > > > > anything, > > > > > > > > > > > > > > there's nothing interesting in the server > error > > > log. > > > > > > > > > > > > > > These 'zombies' appear at a rate of 2-3 per > day; > > > this > > > > > > > apache > > > > > > > > > > > serves > > > > > > > > > > > > > about > > > > > > > > > > > > > > 350 requests per second. > > > > > > > > > > > > > > > > > > > > > > > > > > > > This Apache configuration is > > > > > > > > > > > > > > > > > > > > > > > > > > > > ServerLimit 40 > > > > > > > > > > > > > > ThreadLimit 70 > > > > > > > > > > > > > > > > > > > > > > > > > > > > StartServers 10 > > > > > > > > > > > > > > MaxClients 1600 > > > > > > > > > > > > > > MinSpareThreads 75 > > > > > > > > > > > > > > MaxSpareThreads 200 > > > > > > > > > > > > > > ThreadsPerChild 40 > > > > > > > > > > > > > > MaxRequestsPerChild 10000 > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > _______________________________________________ > > > > > > > > > > > > > > Mod_python mailing list > > > > > > > > > > > > > > Mod_python at modpython.org > > > > > > > > > > > > > > > > > > > http://mailman.modpython.org/mailman/listinfo/mod_python > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > > >
|