#include "CamHCM.h"
#include "Scene.h"
#include "Image.h"
#include "Sample.h"
#include "Shader.h"
#include "CoveragePyramid.h"
#include "TriageMask.h"
#include "List.h"
#include "Tessellation.h"
#include "Octree.h"
#include "Triangle.h"
#include "Light.h"
#include <stdio.h>

static Timer *CurFrameTimer;
static Timer *CurCamTimer;

//
//  Attribute index definitions
//
int CamHCM::kSubpixel;
int CamHCM::kCommand;

//
//  Attribute initialization
//
void CamHCM::initializeAttributes()
{
	Cam::initializeAttributes();

	kSubpixel = addAttr( new AttributeBool("subpixel", CONSTANT, true) );
	kCommand = addAttr( new AttributeBool("command", CONSTANT, false) );
}

//
//  Class constructor - start with visibility structures uninitialized.  Initialize
//                      the attributes for this instance of the class
//
CamHCM::CamHCM()
: tess(NULL), image(NULL), coveragePyramid(NULL), faceOrder(NULL), 
  octree(NULL), ssPoints(NULL)
{
	CamHCM::initializeAttributes();
}

//
//	Pre-render function.  This function is called before any rendering is done.
//  It tessellates the scene and initializes the visibility and image data
//	structures.
//
//	This camera tessellates the entire scene and stores it in a single Tessellation
//  data structure.  This is required so that a global visibility structure can be
//  created.
//
//
void CamHCM::preProcess( Scene& scene )
{
	// set up main camera timer
	CurCamTimer = MainTimer->newChild( "HCM Camera Timer" );
	CurCamTimer->start();

	// set up preprocess timer
	Timer *HCMpreTimer = CurCamTimer->newChild( "HCM preprocess" );
	HCMpreTimer->start();

	// get current attribute values
	vec2 imageRes = attrImageRes();
	vec3 bgColor = attrBgColor();
	bool subpixel = attrSubpixel();
	vec3 origin = attrOrigin();
	vec2 animRange = attrAnimRange();
	
	// first, set the current time to the start of the animation range
	scene.setCurTime( animRange[0] );

	// call the parent preProcess method
	Cam::preProcess( scene );

	// evaluate the geometry transforms at the current time and compute the scene
	// bounding box
	scene.updateGeometry();
	scene.buildBB();

	//*******************START IMAGE SETUP*****************************************
	//
	//  Here we initialize the Image structure that will store the rendered image
	//
	cout << endl << "Setting up image..." << endl;

	Timer *HCMsetupTimer = HCMpreTimer->newChild( "HCM Image/coverage pyramid setup" );
	HCMsetupTimer->start();

	// round the image size to the nearest square resolution where the
	// width/height are powers of 8
	double res = MAX(imageRes[VX], imageRes[VY]);
	int pow = 1<<fastDblToInt((3.0*ROUND((log10(res)/log10(2.0))/3.0)));
	imageRes = vec2( (double)pow );

	// initialize the image (no z-buffer is needed)
	image = new Image( imageRes, bgColor );

	//set up debugging info, if necessary
	#ifdef DEBUG_RENDERDUDE
		vec4 debugRegion = attrDebugRegion();
		if( !(debugRegion == vec4(-1.0)) ) {
			image->setDebugRegion( debugRegion );
		}
	#endif

	cout << "Done image setup." << endl;

	// *******************END IMAGE SETUP******************************************



	//*******************START COVERAGE PYRAMID SETUP******************************
	//
	//  Here we initialize the CoveragePyramid structure which represents visibility
	//  information in screenspace
	//
	cout << endl << "Setting up coverage pyramid..." << endl;

	// we need to supply the resolution and say whether we wish to resolve visibility
	// once per pixel or at the subpixel level (64 samples per pixel)
	coveragePyramid = new CoveragePyramid( pow, subpixel );

	HCMsetupTimer->stop();
	cout << "Done coverage pyramid setup." << endl;
	//*******************END COVERAGE PYRAMID SETUP*********************************



	//*******************START TESSELLATION*****************************************
	//
	//  Here we tessellate all the geometry in the scene, storing the whole thing in
	//  a single Tessellation structure.  The tessellation is performed in worldspace,
	//  which is why object animation is not supported.
	//
	cout << endl << "Tessellating..." << endl;

	Timer *HCMTessTimer = HCMpreTimer->newChild( "HCM tessellation" );
	HCMTessTimer->start();

	// have the objects tessellate themselves into worldspace.  Require that the local
	// tessellations compute plane equations for each face.
	TessOptions localOptions = kTessPlaneEqs;
	scene.localTessellate( localOptions, WORLD );

	// merge the local tessellations into one big global tessellation.  
	TessOptions globalOptions = kTessPlaneEqs | kTessObjIds;
	tess = Tessellation::mergeLocalTessellations( scene.objList, globalOptions, true );

	HCMTessTimer->stop();
	cout << "Done Tessellation.  Number of triangles = " << tess->numTriangles() << endl;
	//*******************END TESSELLATION*********************************************



	//*******************START WORLDSPACE VISIBILITY COMPUTATION***********************
	//
	//  Here we construct a visiblity structure that will permit strict front-to-back
	//  traversal of the tessellation.  First we organize the faces into an octree which
	//  only has faces in its leaf nodes, then we organize each leaf node into a BSP tree.
	//

	// create the octree, using the scene bounding box as the size of the initial node
	cout << endl << "Creating octree..." << endl;
	Timer *OctreeTimer = HCMpreTimer->newChild( "Octree Creation" );
	OctreeTimer->start();

	octree = new Octree( tess, scene.getBB() );

	OctreeTimer->stop();
	cout << "Done octree - number of triangles = " << tess->numTriangles() << endl;

	// organize the leaf nodes into BSP trees
	cout << endl << "Creating BSP..." << endl;
	Timer *BspTimer = HCMpreTimer->newChild( "BSP Creation" );
	BspTimer->start();

	octree->constructBSP();

	BspTimer->stop();
	cout << "Done BSP - number of triangles = " << tess->numTriangles() << endl;

	//*******************END WORLDSPACE VISIBILITY COMPUTATION***********************


	// allocate space for the front-to-back face ordering
	faceOrder = new List<int>( tess->size() );

	// set up light list
	lights = &(scene.lightList);

	// allocate space for screenspace vertex projections
	int numVertices = tess->vList.size();
	ssPoints = new List<vec3>( numVertices );
	ssPoints->resize(numVertices);

	//
	//	If the camera is not animated, we do not need to recompute our face ordering
	//  and screenspace projectionts at every frame (in the preFrameProcess() function), 
	//  so we just do them once here.
	//
	if( !camAnimated() ) {

		// compute the front-to-back face ordering for the current eyepoint
		cout << endl << "Computing ordering..." << endl;
		faceOrder->resize(0);
		octree->orderFaces( attrOrigin(), (*faceOrder) );
		cout << "Done computing ordering." << endl;

		// project all the points into screenspace
		cout << endl << "Computing screenspace points..." << endl;
		worldToScreen( tess->vList, (*ssPoints) );
		cout << "Done computing screenspace points." << endl;
	}

	HCMpreTimer->stop();
}


//
//	Pre-frame function, gets called before each frame is rendered.  
//	Here we clear the image and the coverage pyramid.  If the camera
//  is animated, we recompute our screenspace projections and 
//  front-to-back face ordering
//
//	If the subpixel visibility option is turned on, we use the image
//  as an accumulation buffer, so we need to initialize the image to
//  black, regardless of the actual background colour.  In this case,
//  the background image is blended in after all the faces have been 
//  rendered.
//
void CamHCM::preFrameProcess( Scene& scene )
{
	// set up the timer for this frame
	char timerName[50];
	sprintf( timerName, "Frame %d render", (int)scene.getCurTime() );
	CurFrameTimer = CurCamTimer->newChild( timerName );
	CurFrameTimer->start();

	Cam::preFrameProcess(scene);

	// clear the image before we render the frame
	if( attrSubpixel() ) {
		// clear to black for subpixel rendering
		image->clear(vec3(0.0));
	} else {
		// clear to background colour
		image->clear( attrBgColor() );
	}

	// clear the coverage pyramid
	coveragePyramid->clear();

	// if the camera is animated, recompute screenspace projections
	// and face ordering
	if( camAnimated() ) 
	{
		// get the WS camera position and compute a front-to-back 
		// ordering for that position
		cout << endl << "Computing ordering..." << endl;
		vec3 camWsOrigin = cumulativeTform * attrOrigin();
		faceOrder->resize(0);
		octree->orderFaces( camWsOrigin, (*faceOrder) );
		cout << "Done computing ordering." << endl;

		// recompute screenspace vertices and face ordering
		cout << "Starting screenspace conversion..." << endl;
		worldToScreen( tess->vList, (*ssPoints) );
		cout << "Done screenspace conversion..." << endl;
	}
}


//
//	Post-frame function, gets called after each frame is rendered.
//  Here we render the background (if necessary), and write the image to file
//
void CamHCM::postFrameProcess( Scene& scene )
{
	// blend in the background, if necessary
	vec3 bgColor = attrBgColor();

	// we only need to render the background if it is not black, and if subpixel
	// visibility is enabled (otherwise the background pixels are already the correct
	// colour
	if( (bgColor != vec3(0.0f)) && attrSubpixel() ) {
		renderBG( bgColor, 0, 0, 0 );
	}

	// write the image to file
	char fileName[100];
	sprintf(fileName, "%s.%d.ppm", attrImageName(), (int)scene.getCurTime());
	image->write( fileName );

	CurFrameTimer->stop();
}


//
//	Post-render function, gets called after all frames have been rendered.  Here we
//  clean up the data structures.
//
void CamHCM::postProcess( Scene& scene )
{
	delete tess;
	delete faceOrder;
	delete octree;
	delete image;
	delete coveragePyramid;
	delete ssPoints;

	CurCamTimer->stop();
}


//
//  Frame render function, gets called to render each frame
//
void CamHCM::renderFrame( Scene& scene )
{
	// get current attribute values
	ShadingType interpolateShading = attrShadingType();
	vec2 imageRes = attrImageRes();
	vec3 origin = attrOrigin();

	// how many faces?
	int numFaces = faceOrder->size();

	// triangles for the current face
	Triangle curTess[MAX_VERTICES];

	// sample to be reused for shading 
	Sample& vSample = Attribute::curSample();
	vSample.eye = origin;


	// render each face in front to back order
	for( int i = 0; i < numFaces; i++ ) {
	
		int curFace = (*faceOrder)[i];

		#ifdef DEBUG_RENDERDUDE
			image->setDebugFace( curFace );
		#endif

		// level, x, y of smallest pyramid element enclosing bounding box
		int startLevel, xL, xH, yL, yH;

		// get the size and vertex indices of the current face
		int *vIndices = tess->fList.getList(curFace);
		int faceSize = tess->fList.getListSize(curFace);

		// compute the screenspace bounding box of the current face
		BoundingBox ssBoundingBox;
		for( int j = 0; j < faceSize; j++ ) {
			ssBoundingBox.expand( (*ssPoints)[vIndices[j]] );
		}

		// get the upper/lower bounds of the screenspace bounding box
		vec3 low = ssBoundingBox.lowBound();
		vec3 high = ssBoundingBox.highBound();
		
		// if per-face or flat shading is enabled, we can compute the face colour here
		if( interpolateShading == kPerFace ) {
			
			// for per-face shading, calculate a random face colour
			curFaceColor[0] = ((double)rand())/RAND_MAX;
			curFaceColor[1] = ((double)rand())/RAND_MAX;
			curFaceColor[2] = ((double)rand())/RAND_MAX;
			curFaceColor.clamp(0.0,1.0);

		} else if( interpolateShading == kFlat ) {

			// for flat shading, use white as face colour
			curFaceColor = vec3(1.0,1.0,1.0);
		}

		// discard outside view frustum.  TBD - clip to view frustum
		if( high[VX] <= 0.0 || low[VX] >= imageRes[VX] || high[VY] <= 0.0 || low[VY] >= imageRes[VY] ) {
			continue;
		}

		// find the smallest enclosing screenspace square(s) for this face
		coveragePyramid->enclosingSquares( ssBoundingBox, startLevel, xL, xH, yL, yH );

		// try to do a fast check for occlusion by testing the face's bounding box
		bool occluded = true;
		for( int x = xL; x <= xH; x++ ) {
			for( int y = yL; y <= yH; y++ ) {
				occluded = occluded & ( !coveragePyramid->isBoundingBoxVisible( ssBoundingBox, startLevel, x, y ) );
				if( !occluded ) break;
			}
			if( !occluded ) break;
		}

		// only render the current face if its bounding box was not occluded
		if( !occluded ) {

			// determine whether this face is backfacing, we need to flip the vertex order if it is
			vec3 eyeToFace = (tess->vList[vIndices[0]])-origin;
			bool reverse = ( eyeToFace * vec3(tess->eList[curFace],VW) > 0.0 );

			// for per-face and flat shading, we render each face as a whole
			if( (interpolateShading == kPerFace) || (interpolateShading == kFlat) ) {
				for( int x = xL; x <= xH; x++ ) {
					for( int y = yL; y <= yH; y++ ) {
						renderFace( curFace, reverse, startLevel, x, y );
					}
				}

			} else {

				// we need to do interpolative shading, so we break the face into triangles 
				// (for barycentric interpolation).  Tessellate using the worldspace vertices 
				// and normals in the tess data.
				int numTriangles = tess->tessellateFace( curFace, reverse, NULL, (*ssPoints), NULL, curTess ); 

				// get the shader for the current face
				Shader *shaderPtr = tess->getFaceShape(curFace)->getShader();

				// get reference to the sample that we will be using (all nodes in the shading
				// network will use this same sample)
				Sample& curSample = Attribute::curSample();

				// render each triangle of the current face
				for( int curTri = 0; curTri < numTriangles; curTri++ ) {
					int v;  

					// do some shading type-specific setup work before rendering each triangle
					switch( interpolateShading ) {

					case kPerVertex:

						// set up the affineTri interpolation data structure for the current triangle
						curTriData.setup( curTess[curTri] );

						// compute the vertex colours
						for( v = 0; v < 3; v++ ) {
							curTess[curTri].fillSample( v, vSample );
							vertexColors[v] = shaderPtr->computeColor( scene.lightList ).clamp(0.0,1.0);
						}
						break;


					case kPerPixel:

						// just initialize the interpolation data structure, we will shade at each pixel
						curTriData.setup( curTess[curTri] );
						break;

					}

					// render each triangle into the region
					for( int x = xL; x <= xH; x++ ) {
						for( int y = yL; y <= yH; y++ ) {
							renderTri( curTess[curTri], shaderPtr, startLevel, x, y );
						}
					}
				}
			}
		}
	}
}


//
//  This function renders a single face as a whole.  This is used only by shading
//  modes that assign a constant colour to each face, namely flat and per-face modes.
//
//	The rendering starts at position (x,y) at the specified level in the coverage
//  pyramid, where level 0 is the peak of the pyramid ( (0,0) at level 0 represents the
//  entire image).
//
void CamHCM::renderFace( int face, bool reverse, int level, int x, int y )
{
	ShadingType interpolateShading = attrShadingType();

	// only use for per-face shading
	DB_ASSERT( ((interpolateShading == kPerFace) || (interpolateShading == kFlat)), 
				"ERROR - wrong shading model for face rendering" );
	

	Bitmask W;				// current write mask
	Bitmask A;				// current active mask
	TriageMask faceTmask;	// triage mask for the current face
	vec3 currentColor;		// colour of current sample

	// determine the region of the image into which we are rendering
	vec4 region = coveragePyramid->imageRange( level, x, y );

	if( level == coveragePyramid->maxLevel() ) {

		// if this is the lowest level of the coverage pyramid, we write pixels here.  If
		// subpixel rendering is on, then this region represents a single pixel, otherwise
		// it represents an 8x8 block of pixels. Either way, the visibility of the current
		// region is represented by a Bitmask (not a TriageMask) in the coverage pyramid

		// get the bitmask for the current face
		Bitmask bmask = tess->bitMask( face, reverse, *ssPoints, region, false );

		// tile the current face into the coverage pyramid at the appropriate level.  The
		// results are the W and A masks for the current face.
		coveragePyramid->tilePolygon( level, x, y, TriageMask(bmask,~bmask), W, A );

		// we now write the appropriate pixels
		if( attrSubpixel() ) {

			// for subpixel rendering, estimate the pixel coverage from the bitcount in the
			// W mask and use this value to weight the face colour.  Increment the pixel color
			// by this amount
			float covFrac = (float)W.numBits() / 64.0f;
			vec3 incr = covFrac*curFaceColor;

			#ifdef DEBUG_RENDERDUDE
				if( image->regionDebug() ) {
					image->setDebugMask( W );
				}
			#endif

			// increment the current pixel
			image->incrementPixel( x, y, incr );

		} else {

			// subpixel rendering is off, so the current region is an 8x8 block of
			// pixels in the image.  Fill in the appropriate pixels with the face colour

			// lower left corner of the region
			int startX = fastDblToInt(region[XL]);
			int startY = fastDblToInt(region[YL]);

			// current pixel in the region
			int curX = 0;
			int curY = 0;

			// how many pixels to fill?
			int pixelsToFill = W.numBits();

			// scan through the region, left to right, top to bottom, until we have
			// filled in all required pixels
			while( pixelsToFill > 0 ) {

				// fill in the current pixel, if necessary
				if( W.testBit(curX,curY) ) {
					image->setPixel( startX+curX, startY+curY, curFaceColor );
					pixelsToFill--;
				}

				// increment position to next pixel
				curX++;			// next col
				curY+=(curX/8); // bump to next row if necessary
				curX %= 8;		// wrap back to start if necessary
			}
		}

	} else {
		
		// we are not at the lowest level of the coverage pyramid, so we tile the face,
		// write the regions that are covered, and recurse on the active regions

		// lower left corner of the region
		int startX = fastDblToInt(region[XL]);
		int startY = fastDblToInt(region[YL]);

		// the subregion currently being considered
		int curX, curY;

		// number of pixels to increment when moving through the region
		int incr = fastDblToInt(((region[XU]-region[XL])/8.0));

		// get the triage mask for the face in this region and tile it into the coverage
		// pyramid
		TriageMask tmask = tess->triageMask( face, reverse, *ssPoints, region, true );
		coveragePyramid->tilePolygon( level, x, y, tmask, W, A );

		int numActive = A.numBits();
		int numWritable = W.numBits();

		// loop through the write mask looking for writable regions to write
		curX = curY = 0;
		while( numWritable ) {

			// check to see if the current subregion is writable
			if( W.testBit(curX,curY) ) {

				// set the pixels in the current region
				image->setPixels( startX+(curX*incr), startY+(curY*incr), 
								  startX+((curX+1)*incr)-1, startY+((curY+1)*incr)-1, 
								  curFaceColor );

				// fill in the coverage mask below this region - TBD - should be part of 
				// tilePolygon()
				coveragePyramid->fillBelow( level+1, 8*x+curX, 8*y+curY );

				numWritable--;
			}

			// advance to the next subregion
			curX++;
			curY += (curX/8);
			curX %= 8;
		}
				

		// loop through the active mask looking for active regions to recurse on
		curX = curY = 0;
		while( numActive ) {

			// check to see if the current subregion is active
			if( A.testBit(curX,curY) ) {

				// if it is, recurse on it
				renderFace( face, reverse, level+1, 8*x+curX, 8*y+curY );
				numActive--;
			}

			// advance to the next subregion
			curX++;
			curY += (curX/8);
			curX %= 8;
		}
	}

}

//
//  This function is used in the interpolative shading modes to render a single triangle,
//  starting from position (x,y) at the specified level in the coverage pyramid.
//
void CamHCM::renderTri( Triangle& tri, Shader *shader, int level, int x, int y )
{
	Bitmask W;				// write mask for current region
	Bitmask A;				// active mask for current region
	TriageMask triTmask;	// triage mask for current tri in current region

	vec2 screenSamplePoint;	// point at which shading samples are taken
	vec3 sampleCoords;		// barycentric coordinates of sample position
	Sample& currentSample = Attribute::curSample();	// sample used for all shading
	vec3 currentColor;		// color of current sample

	// fill in the eye position in the shading sample 
	vec3 origin = attrOrigin();
	currentSample.eye = origin;

	// figure out what region of the image we are rendering into
	vec4 region = coveragePyramid->imageRange( level, x, y );

	if( level == coveragePyramid->maxLevel() ) {

		// if this is the lowest level of the pyramid, then the visibility of the
		// current region is represented by a bitmask, so we composite the triangle
		// mask with the current mask to determine which samples of the triangle are
		// visible.

		// get the bitmask for the triangle in the current region
		Bitmask bmask = tri.bitMask( region, false );

		// tile the triangle into the region
		coveragePyramid->tilePolygon( level, x, y, TriageMask(bmask,~bmask), W, A );

		if( attrSubpixel() ) {

			// if subpixel rendering is enabled, the current region is a single pixel, so
			// calculate the color of the triangle, weight the color by the triangle's 
			// coverage, and increment the pixel color by this amount

			switch( attrShadingType() ) {

			case kPerVertex:

				// For per-vertex shading, use the centre of the pixel as the color of
				// the fragment.  Interpolate this colour from the already computed
				// colors at the vertices.

				// choose the sample point and compute the affine coordinates of the point
				// (in screenspace)
				screenSamplePoint = vec2( x+0.5, y+0.5 );
				sampleCoords = curTriData.computeAC( screenSamplePoint );

				// use the affine coordinates to interpolate the already computed vertex colours
				currentColor = sampleCoords[0]*vertexColors[0] +
							   sampleCoords[1]*vertexColors[1] +
							   sampleCoords[2]*vertexColors[2];
				currentColor.clamp(0.0,1.0);

				break;

			case kPerPixel:

				// for per-pixel shading, take an actual shading sample at the centre of
				// the pixel

				// choose the sample point and compute the affine coordinates at the point
				screenSamplePoint = vec2( x+0.5, y+0.5 );
				sampleCoords = curTriData.computeAC( screenSamplePoint );

				// interpolate the shading parameters at the sample point
				tri.fillSample( sampleCoords, currentSample );

				// take a shading sample at the sample point
				currentColor = shader->computeColor( *lights ).clamp(0.0,1.0);
				break;
			}

			// estimate the pixel coverage fraction by the number of bits set in the
			// mask
			float covFrac = (float)W.numBits() / 64.0f;

			// weight the color by the coverage and increment the pixel value
			vec3 incr = covFrac*currentColor;
			image->incrementPixel( x, y, incr );

		} else {

			// we are at the lowest level of the pyramid, but subpixel rendering is not
			// turned on, so the current region is an 8x8 block of pixels.  Now we are
			// only taking one visibility sample per pixel, and we still do shading at the
			// centre of the pixel

			// bottom-left corder of the region
			int startX = fastDblToInt(region[XL]);
			int startY = fastDblToInt(region[YL]);

			// current position in the region
			int curX = 0;
			int curY = 0;

			// loop through the set bits in the write mask and write the corresponding
			// pixels
			int numWritable = W.numBits();
			while( numWritable ) {

				// if the current pixel is to be written, set its color
				if( W.testBit(curX,curY) ) {

						switch( attrShadingType() ) {

						case kPerVertex:

							// for per-vertex shading, same as above, interpolate the triangle
							// colors at the centre of the pixel from the vertex colors of the
							// triangle

							screenSamplePoint = vec2( startX+curX+0.5, startY+curY+0.5 );
							sampleCoords = curTriData.computeAC( screenSamplePoint );

							currentColor = sampleCoords[0]*vertexColors[0] +
										   sampleCoords[1]*vertexColors[1] +
										   sampleCoords[2]*vertexColors[2];
							currentColor.clamp(0.0,1.0);

							break;

						case kPerPixel:

							// for per-pixel shading, same as above, interpolate the shading
							// parameters at the centre of the pixel and take a shading sample

							screenSamplePoint = vec2( startX+curX+0.5, startY+curY+0.5 );
							sampleCoords = curTriData.computeAC( screenSamplePoint );

							tri.fillSample( sampleCoords, currentSample );
							currentColor = shader->computeColor( *lights ).clamp(0.0,1.0);

							break;
						}

						// set the pixel
						image->setPixel( startX+curX, startY+curY, currentColor );

						// one less pixel to be written
						numWritable--;
				}

				// advance to the next subregion
				curX++;
				curY += (curX/8);
				curX %= 8;
			}
		}

	} else {
		
		// we are not at the lowest level of the pyramid, so tile the triangle into the 
		// current region, shade the regions that are writable, and recurse on the regions
		// that are active

		// bottom left of the region
		int startX = fastDblToInt(region[XL]);
		int startY = fastDblToInt(region[YL]);

		// distance between subregions
		int incr = fastDblToInt(((region[XU]-region[XL])/8.0));

		int xL, xH, yL, yH;		// pixel boundaries of region to be filled in
		int curX, curY;			// current pixel in region

		// get the triage mask for the triangle in the current region
		TriageMask tmask = tri.triageMask( region, true );

		// tile the triangle into the current region
		coveragePyramid->tilePolygon( level, x, y, tmask, W, A );


		// loop through the writable subregions
		int curRegionX = 0;
		int curRegionY = 0;

		int numWritable = W.numBits();
		int numActive = A.numBits();

		while( numWritable ) {

			if( W.testBit(curRegionX, curRegionY) ) {

				// fill in the coverage pyramid below this region
				coveragePyramid->fillBelow( level+1, 8*x+curRegionX, 8*y+curRegionY );

				// one less region writable
				numWritable--;

				// determine the pixel range of the writable region
				xL = startX+(curRegionX*incr);
				xH = xL+incr;
				yL = startY+(curRegionY*incr);
				yH = yL+incr;

				// loop through all the pixels in the writable region and fill them in
				switch( attrShadingType() ) {

				case kPerVertex:

					// loop through the pixels in the range.  Calculate each pixel color
					// by interpolating the color at the centre of the pixel from the colors
					// of the triangle vertices

					for( curX = xL; curX < xH; curX++ ) {
						for( curY = yL; curY < yH; curY++ ) {

							// compute centre of pixel and its affine coords
							screenSamplePoint = vec2( curX+0.5, curY+0.5 );
							sampleCoords = curTriData.computeAC( screenSamplePoint );

							// interpolate the pixel centre color
							currentColor =	sampleCoords[0]*vertexColors[0] +
											sampleCoords[1]*vertexColors[1] +
											sampleCoords[2]*vertexColors[2];
							currentColor.clamp(0.0,1.0);

							// set the pixel to the color
							image->setPixel( curX, curY, currentColor );
						}
					}

					break;

					case kPerPixel:
				
						// loop through the pixels in the range.  For each, interpolate the
						// shading parameters at the centre of the pixel and take a shading
						// sample there

						for( curX = xL; curX < xH; curX++ ) {
							for( curY = yL; curY < yH; curY++ ) {

								// compute centre of pixel and its affine coords
								screenSamplePoint = vec2( curX+0.5, curY+0.5 );
								sampleCoords = curTriData.computeAC( screenSamplePoint );

								// compute the shading sample data and calculate the color
								tri.fillSample( sampleCoords, currentSample );
								currentColor = shader->computeColor( *lights ).clamp(0.0,1.0);
								image->setPixel( curX, curY, currentColor );
							}
						}

						break;
				}
			}

			// advance to the next subregion
			curRegionX++;
			curRegionY += (curRegionX/8);
			curRegionX %= 8;
		}
			

		// loop through the active subregions
		curRegionX = 0;
		curRegionY = 0;

		while( numActive ) {

			if( A.testBit(curRegionX, curRegionY) ) {
				
				// current subregion is active, so recurse on it
				renderTri( tri, shader, level+1, 8*x+curRegionX, 8*y+curRegionY );

				// one less active region
				numActive--;
			}

			// advance to the next subregion
			curRegionX++;
			curRegionY += (curRegionX/8);
			curRegionX %= 8;

		}
	}
}

//
//  This function renders the constant-color background as if it were
//  a plane perpendicular to the camera and covering the entire image.
//  We must do it this way when subpixel rendering is enables, otherwise
//  blending of subpixel fragments won't be correct.
//
void CamHCM::renderBG( vec3& bgColor, int level, int x, int y )
{
	// the mask for the background plane is always full, since it fills
	// the entire image, so precompute some full masks.
	static Bitmask fullBitmask(FULL);
	static TriageMask fullTriageMask(FULL);

	Bitmask W, A;	// writable, active masks for current region
	vec4 region;	// image coordinates of current region


	if( level == coveragePyramid->maxLevel() ) {

		// this is the lowest level of the pyramid

		// tile the bg into the current region
		coveragePyramid->tilePolygon( level, x, y, fullTriageMask, W, A );

		if( attrSubpixel() ) {

			// if subpixel rendering is on, the region represents a single
			// pixel, so blend in the bg color
			float covFrac = (float)W.numBits() / 64.0f;
			vec3 incr = covFrac*bgColor;

			image->incrementPixel( x, y, incr );

		} else {

			// the current region is an 8x8 block of pixels, so fill in 
			// the writable pixels
			region = coveragePyramid->imageRange( level, x, y );

			int startX = fastDblToInt(region[XL]);
			int startY = fastDblToInt(region[YL]);

			for( int i = 0; i < 8; i++ ) {
				for( int j = 0; j < 8; j++ ) {

					if( W.testBit(i,j) ) {
						image->setPixel( startX+i, startY+j, bgColor );
					}
				}
			}
		}

	} else {

		// we are not at the lowest level of the pyramid, so tile the poly into the 
		// current region, write the writable regions, and recurse on the active
		// regions
		region = coveragePyramid->imageRange( level, x, y );
		
		int startX = fastDblToInt(region[XL]);
		int startY = fastDblToInt(region[YL]);
		int incr = fastDblToInt(((region[XU]-region[XL])/8.0));

		// tile the poly
		coveragePyramid->tilePolygon( level, x, y, fullTriageMask, W, A );

		int numWritable = W.numBits();
		int numActive = A.numBits();

		// current subregion
		int curX = 0;
		int curY = 0;

		// write the writable subregions
		while( numWritable ) {

			// if the current subregion is writable, fill it in with the
			// background color
			if( W.testBit(curX,curY) ) {
				image->setPixels( startX+(curX*incr), startY+(curY*incr), 
								  startX+((curX+1)*incr)-1, startY+((curY+1)*incr)-1, 
								  bgColor );

				numWritable--;
			}

			// advance to the next subregion
			curX++;
			curY += (curX/8);
			curX %= 8;
		}

		// recurse on the active regions
		curX = curY = 0;
		while( numActive ) {

			// recurse on current subregion if it is active
			if( A.testBit(curX,curY) ) {
				renderBG( bgColor, level+1, 8*x+curX, 8*y+curY );
				numActive--;
			}

			// advance to the next subregion
			curX++;
			curY += (curX/8);
			curX %= 8;
		}
	}
}




