[mod_python] Mapping a URI to pages dynamically.

ganapathy murali krishnan gmurali at cs.uchicago.edu
Mon May 19 15:59:32 EST 2003

Sorry for not replying earlier... Here is the code, we use...
I have removed pieces of the code which deal with other stuff.
I hope I have not removed too many things...
This code is due to Dustin Mitchell who setup our site

We use translate.py as our translate handler, which as you can
see creates a req.url, which has lots of information about the
requested url. The check_dont_handle function, allows us to not
handle certain file types or certain subtrees of the DOCROOT.

Hope you find the code useful.

- Murali

------ translate.py ------------
from mod_python import apache, util
from utils import urls
import os, string, config

# This module is responsible for translating the URL space as designed
# into a filename, which Apache will use to control further processing
# of the request.
# It does lots of other things too, basically to process the request
# into a nice, Python-ish format.

def transhandler(req):
   # First check if we're configured not to handle this request.

   # Otherwise begin handling this normally
     # We use the URL class to calculate all of the parts of the URL
     # we were given
     req.url = urls.URL(req)

     # Check if this is a secure connection
     req.secure = is_secure(req)

     # Copy some important values into req
     req.filename, req.modpath = req.url.filename, req.url.modpath

     # Grab any relevant form data,
     req.form_data = util.FieldStorage(req, keep_blank_values=1, 

     # and register the next handler
     req.add_handler("PythonHandler", "handlers.dispatch")

   # And handle any errors during that time by logging and sending the
   # usual Apache 404 message.
     return apache.HTTP_NOT_FOUND

   return apache.OK

def check_dont_handle(req):
   """Supports the PythonOption DontHandle Apache configuration option:

PythonOption DontHandle initial-uri[:initial-uri[:initial-uri...]]

which causes the Python site to decline to handle any requests beginning
with one of the specified initial-uri's.  In that case, normal Apache
behavior will apply.
   options = req.get_options()
   uri = req.uri
   if options.has_key('DontHandle'):
     dont_handle = string.split(options['DontHandle'], ':')
     for prefix in dont_handle:
       if prefix == uri[:len(prefix)]:
         raise apache.SERVER_RETURN, apache.DECLINED

def is_secure(req):
   # This seems the best way to do this
   return req.server.port == 443

-----end of translate.py ---------

------ urls.py ---------------

import config
from mod_python import util
import os, string, copy
import re

# This module is responsible for translating the URL space as designed
# into its components:
# http://py.cs.uchicago.edu/path/to/program/args1/args2?internal+data
# ----   ------------------ --------------- ----------- -------------
# |      server             \ script_path   \ args_path \ internal
# \ scheme

# All of these fields are member variables of the URL class. Further,
# args_path is available in list form as url.arguments.
# url.internal is a dictionary representing the value of internal as
# a standard URL query string.  url.special contains the characters
# from internal if it contains no '=', and is empty otherwise.

# the module sets the following fields in req:
#  filename -- the filename of the file which will produce the final
#     response.  Apache cares about this.
#  modpath -- a list of nested Python modules which will succeed in
#     representing the script.
# It's important to note that the latter two options define the file
# which will produce the final page, while the annotations on the URL
# are produced regardless of the Python program that will eventually
# produce the data.
# In particular, Apache is depending on this module to provide
# req.filename.  Apache config commands (e.g. <Directory>) will look
# at this result to determine what to do next.  To help out down the
# road, we also keep track of a module path to get to the relevant .py
# file, and the portion of the URI which specified that file.

# Python script files are:
#   'docroot'/'script_path'.py
# or
#   'docroot'/'script_path'/index.py
# And this program will find the longest possible match among those
# possibilities.

class URL:
   "Object to represent the URL for a given hit on the site."

   def __init__(self, req):
     # get the scheme (e.g., http or https)
     if req.server.port == 443:
       self.scheme = 'https'
       self.scheme = 'http'

     # Get the method (e.g., 'GET', 'POST', 'HEAD')
     self.method = req.method

     # server name (just to be safe)
     self.server = req.server.server_hostname

     # parse the path, finding the longest part that still refers to a
     # Python script.

   # ----
   # Helper functions

   def _parse_path(self, path):
     # Strip that initial slash
     path = path[1:]

     # Break the path into components
     components = []
     if path:
       components = string.split(path, '/')

     # We operate on 4-tuples:
     current = (
       config.docroot,                   # filename
       config.docmod,                    # modpath
       "/",                              # script_path
       components )                      # arguments
     FILENAME = 0
     MODPATH = 1
     SCRIPT_PATH = 2
     ARGUMENTS = 3

     best = None

     # Loop over those components
     while 1:
       # Try two variations of current, in order of length.
       # 'filename'.py
       fn = current[FILENAME] + '.py'
       if self._intree(fn, config.docroot) and \
         best = ( fn, current[1], current[2], current[3] )

       # If it's a directory we can try 'filename'/index.py
       if os.path.isdir(current[FILENAME]):
         fn = os.path.join(current[FILENAME], 'index.py')
         if self._intree(fn, config.docroot) and \
           best = ( fn, current[1] + ( 'index', ),
                    current[2], current[3] )

         # And since it's a directory, we can try the next level of
         # directory nesting
         if current[ARGUMENTS]:
           # Get the first component (that used to be an argument)
           comp = current[ARGUMENTS][0]
           # Break out on any funny business
           if comp == '..' or comp == '.' or comp == '':

           # Ignore '.php', for compatibility
           if comp[-4:] == '.php':
             comp = comp[:-4]

           # Move to a new 'current'
           current = (
             os.path.join(current[FILENAME], comp),
             current[MODPATH] + ( comp, ),
             os.path.join(current[SCRIPT_PATH], comp),
             current[ARGUMENTS][1:] )


     ( self.filename, self.modpath,
       self.script_path, self.arguments ) = best

     # and touch up the arguments a little bit...
     # filter out any empty elements
     self.arguments = filter(None, self.arguments)
     # and include a slash in the args_path only if necessary
     if self.script_path[-1] == '/':
       self.args_path = string.join(self.arguments, '/')
       self.args_path = '/' + string.join(self.arguments, '/')

   # Returns boolean indicating if PATH is in the directory tree rooted
   # at ROOT.  Also checks that PATH is absolute, and invariant under
   # normpath(), catching things like '../' and './'.  This class uses
   # this function as a sanity check.
   def _intree(self, path, root):
     return path == os.path.abspath(path) and \
            path == os.path.normpath(path) and \
            path[:len(root)] == root

---- end of urls.py -----

