Tuesday, May 10, 2011

Head Pose Estimation by using POSIT in OpenCV

I want to use OpenCV functions to do "Head Pose Estimation", I found an interesting post by "Roy" at http://www.morethantechnical.com/2010/03/19/quick-and-easy-head-pose-estimation-with-opencv-w-code.
In his post, OpenCV solvePnP was used in order to calculate fundamental matrix that can be used to map 3D points in model to 2D point in images. But I found OpenCV also provides POSIT implementation and very nice POSIT tutorial at http://opencv.willowgarage.com/wiki/Posit. So I decide to use "Roy" data and apply to POSIT function of OpenCV.

#include <cxcore.h>
#include <cv.h>
#include <highgui.h>


using namespace std;
using namespace cv;

#define FOCAL_LENGTH 1000
#define CUBE_SIZE 10

static const char* IMAGE_DATA_PATH = "/Users/mhrinc/Desktop/image_data/";
#define IMAGE_EXTENSION ".jpeg"
#define DATA_EXTENSION ".txt"

std::vector<CvPoint3D32f> modelPoints;
std::vector<CvPoint3D32f> glassesPoints;

void getImagePath(char* result,const char* imageFileName) {
    sprintf(result,"%s%s%s", IMAGE_DATA_PATH, imageFileName,IMAGE_EXTENSION);
}
void getDataPath(char* result,const char* dataFileName) {
    sprintf(result,"%s%s%s", IMAGE_DATA_PATH, dataFileName,DATA_EXTENSION);
}

vector<const char* > listOfImages() {
    vector<const char* > arrayOfImages;
    //Man
    
    arrayOfImages.push_back("man/barack_obama_1");
    arrayOfImages.push_back("man/barack_obama_2");
    arrayOfImages.push_back("man/bill-gates_1");
    arrayOfImages.push_back("man/bill-gates_2");
    arrayOfImages.push_back("man/david_beckham_1");
    arrayOfImages.push_back("man/david_beckham_2");
    arrayOfImages.push_back("man/nguyen_tan_dung_1");
    arrayOfImages.push_back("man/nguyen_tan_dung_2");
    arrayOfImages.push_back("man/bill_clinton_1");
    arrayOfImages.push_back("man/bill_clinton_2");
    //Woman
    
    arrayOfImages.push_back("woman/britney_spears_1");
    arrayOfImages.push_back("woman/cheryl_cole_2");
    arrayOfImages.push_back("woman/jang_nara_1");
    arrayOfImages.push_back("woman/kim_tae_hee_2");
    arrayOfImages.push_back("woman/britney_spears_2");
    arrayOfImages.push_back("woman/harry_clinton_1");
    arrayOfImages.push_back("woman/jang_nara_2");
    arrayOfImages.push_back("woman/michelle_obama_1");
    arrayOfImages.push_back("woman/cheryl_cole_1");
    arrayOfImages.push_back("woman/harry_clinton_2");
    arrayOfImages.push_back("woman/kim_tae_hee_1");
    arrayOfImages.push_back("woman/michelle_obama_2");
    arrayOfImages.push_back("woman/Angelina_Jolie_0002");
    arrayOfImages.push_back("woman/Angelina_Jolie_0003");
    arrayOfImages.push_back("woman/Angelina_Jolie_0004");
    arrayOfImages.push_back("woman/Angelina_Jolie_0005");
    arrayOfImages.push_back("woman/Angelina_Jolie_0006");
    arrayOfImages.push_back("woman/Angelina_Jolie_0007");
    arrayOfImages.push_back("woman/Angelina_Jolie_0008");
    
    return arrayOfImages;
}

void loadNext(CvPOSITObject* positObject, const char* imageFileName) {
    int _w, _h;
    char buf[256] = {0};
    getDataPath(&buf[0], imageFileName);
    cout << "Data:"<< buf << endl;
    vector<CvPoint2D32f > points;
    FILE* f;
    f = fopen(buf,"r");
    fscanf(f,"%i",&_w);
    fscanf(f,"%i",&_h);
    cout << "widht:"<<_w<<",height:"<<_h<<endl;
    float _width = _w/2.0;
    float _height = _h/2.0;
    for(int i=0;i<7;i++) {
        int x,y;
        fscanf(f,"%d",&x);
        fscanf(f,"%d",&y);
        cout << x<<","<<y<<",";
        points.push_back(cvPoint2D32f(-_width+x,_height - y));
    }
    cout << endl;
    fclose(f);
    
    getImagePath(&buf[0], imageFileName);
    cout << "Image:"<< buf << endl;
    
    Mat img = imread(buf);
    for(unsigned int i=0;i<points.size();i++) {
        CvPoint2D32f p = cvPoint2D32f(points[i].x+_width,_height-points[i].y);
        circle(img,p,2,Scalar(255,0,255),CV_FILLED);
    }

    //Estimate the pose
    CvMatr32f rotation_matrix = new float[9];
    CvVect32f translation_vector = new float[3];    
    CvTermCriteria criteria = cvTermCriteria(CV_TERMCRIT_EPS | CV_TERMCRIT_ITER, 100, 1.0e-4f);
    cvPOSIT( positObject, &points[0], FOCAL_LENGTH, criteria, rotation_matrix, translation_vector );

    cout << "\n-.- SOURCE IMAGE POINTS -.-\n";
    for ( size_t p=0; p<points.size(); p++ )
        cout << points[p].x << ", " << points[p].y << " \n";
    
    cout << "\n-.- ESTIMATED ROTATION\n";
    for ( size_t p=0; p<3; p++ )
        cout << rotation_matrix[p*3] << " | " << rotation_matrix[p*3+1] << " | " << rotation_matrix[p*3+2] << "\n";
    
    cout << "\n-.- ESTIMATED TRANSLATION\n";
    cout << translation_vector[0] << " | " << translation_vector[1] << " | " << translation_vector[2] << "\n";
    
    //Project the model points with the estimated pose
    std::vector<CvPoint2D32f> projectedPoints;
    for ( size_t  p=0; p<glassesPoints.size(); p++ )
    {
        CvPoint3D32f point3D;
        point3D.x = rotation_matrix[0] * glassesPoints[p].x + 
        rotation_matrix[1] * glassesPoints[p].y +
        rotation_matrix[2] * glassesPoints[p].z +
        translation_vector[0];
        point3D.y = rotation_matrix[3] * glassesPoints[p].x + 
        rotation_matrix[4] * glassesPoints[p].y +
        rotation_matrix[5] * glassesPoints[p].z +
        translation_vector[1];
        point3D.z = rotation_matrix[6] * glassesPoints[p].x + 
        rotation_matrix[7] * glassesPoints[p].y +
        rotation_matrix[8] * glassesPoints[p].z +
        translation_vector[2];
        CvPoint2D32f point2D = cvPoint2D32f( 0.0, 0.0 );
        if ( point3D.z != 0 )
        {
            point2D.x = FOCAL_LENGTH * point3D.x / point3D.z; 
            point2D.y = FOCAL_LENGTH * point3D.y / point3D.z;    
        }
        projectedPoints.push_back( point2D );
    }
    cout << "\n-.- PROJECTED POINTS -.-\n";
    for ( size_t p=0; p<projectedPoints.size(); p++ )
        cout << projectedPoints[p].x << ", " << projectedPoints[p].y << " \n";
    for(unsigned int i=0;i<projectedPoints.size();i++) {
        CvPoint2D32f p = cvPoint2D32f(projectedPoints[i].x+_width,_height-projectedPoints[i].y);
        circle(img,p,2,Scalar(255,0,0),CV_FILLED);
    }
    
    delete rotation_matrix;
    delete translation_vector;
    imshow("1", img);
    cvWaitKey(0);
}    

int main(int argc, char** argv)
{
    cout << "OpenCV POSIT tutorial" << endl;
    cout << "by Javier Barandiaran(jbarandiaran@gmail.com)" << endl;
    
    //Create the model pointss
    float xOffset = 36.9522f;
    float yOffset = -39.3518f;
    float zOffset = -47.1217f;
    modelPoints.push_back(cvPoint3D32f(xOffset+-36.9522f,yOffset+39.3518f,zOffset+47.1217f));    //l eye
    modelPoints.push_back(cvPoint3D32f(xOffset+35.446f,yOffset+38.4345f,zOffset+47.6468f));        //r eye
    modelPoints.push_back(cvPoint3D32f(xOffset+-0.0697709f,yOffset+18.6015f,zOffset+87.9695f)); //nose
    modelPoints.push_back(cvPoint3D32f(xOffset+-27.6439f,yOffset+-29.6388f,zOffset+73.8551f));    //l mouth
    modelPoints.push_back(cvPoint3D32f(xOffset+28.7793f,yOffset+-29.2935f,zOffset+72.7329f));    //r mouth
    
    float xx,yy;
    xx = 20, yy = 15;
    glassesPoints.push_back(cvPoint3D32f(xOffset+-36.9522f - xx,yOffset+39.3518f + yy,zOffset+47.1217f));    //l eye
    glassesPoints.push_back(cvPoint3D32f(xOffset+-36.9522f - xx,yOffset+39.3518f - yy,zOffset+47.1217f));    //l eye
    glassesPoints.push_back(cvPoint3D32f(xOffset+35.446f + xx,yOffset+38.4345f + yy,zOffset+47.6468f));            //r eye
    glassesPoints.push_back(cvPoint3D32f(xOffset+35.446f + xx,yOffset+38.4345f - yy,zOffset+47.6468f));            //r eye
//    modelPoints.push_back(Point3f(-87.2155f,15.5829f,-45.1352f));    //l ear
//    modelPoints.push_back(Point3f(85.8383f,14.9023f,-46.3169f));    //r ear
    cout << "\n-.- SOURCE MODEL POINTS -.-\n";
    for ( size_t  p=0; p<modelPoints.size(); p++ )
        cout << modelPoints[p].x << ", " << modelPoints[p].y << ", " << modelPoints[p].z << "\n";
    
    //Create the POSIT object with the model points
    CvPOSITObject* positObject;
    positObject = cvCreatePOSITObject( &modelPoints[0], (int)modelPoints.size() );
    
    //Do processing
    int imageIndex = 0;
    vector<const char* > images = listOfImages();
    while (imageIndex < images.size()) {
        const char* imageFileName = images.at(imageIndex);
        loadNext(positObject, imageFileName);
        imageIndex++;
    }

    cvReleasePOSITObject(&positObject);
    return 0;
}

Building FFmpeg for IPhone

1. Check out FFmpeg source code 
2. Check out gas-preprocessor 
3. Run configure command 

./configure --cc=/Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/arm-apple-darwin10-gcc-4.2.1 --as='gas-preprocessor.pl /Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/arm-apple-darwin10-gcc-4.2.1' --sysroot=/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS3.2.sdk --enable-cross-compile --target-os=darwin --arch=arm --cpu=cortex-a8 --enable-pic --prefix=$HOME/Downloads/ffmpeg/iphone_build 
4. Create a project in Xcode 
5. Add static libraries to Frameworks 
6. Add libz.1.2.3 
7. Add include path and library link path to XCode project 
8. Write some example code 
9. Build and run on real IPhone device


Enable MP3 encoder/decoder
1. Download opencore-amr
2. Run command

./configure --cc=/Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/arm-apple-darwin10-gcc-4.2.1 --as='gas-preprocessor.pl /Developer/Platforms/iPhoneOS.platform/Developer/usr/bin/arm-apple-darwin10-gcc-4.2.1' --sysroot=/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS3.2.sdk --enable-cross-compile --target-os=darwin --arch=arm --cpu=cortex-a8 --enable-pic --prefix=$HOME/Downloads/ffmpeg/iphone_build --enable-libopencore-amrnb --enable-version3 --extra-cflags=-I/Users/mhrinc/Downloads/ffmpeg --extra-ldflags=-L/Users/mhrinc/Downloads/ffmpeg/opencore-amrnb --disable-doc --disable-ffserver --disable-ffprobe --disable-swscale --disable-postproc --disable-avfilter --disable-dxva2 --disable-vdpau --disable-vaapi --disable-rdft --disable-mdct --disable-lpc --disable-huffman --disable-golomb --disable-fft --disable-dct --disable-aandct --disable-decoders --disable-encoders --disable-hwaccels --disable-muxers --disable-demuxers --disable-parsers --disable-bsfs --disable-protocols --disable-indevs --disable-outdevs --disable-filters \ 
--enable-decoder=wavpack --enable-encoder=libopencore_amrnb \ 
--enable-muxer=wav --enable-muxer=amr \ 
--enable-demuxer=wav --enable-demuxer=amr \ 
--enable-parser=wav --enable-parser=amr \

Face features detection with OpenCV

Object detection proposed by Paul Viola was implemented in OpenCV. We also can find some classifiers (working with Haar-like features) in OpenCV such as frontal face, profile face, eyes, nose and mouth. Each classifier is trained with multiple sample views of a particular object in the same size including positive images and negative images.
In this post, we will use built-in OpenCV functions to detect some features on the face. First, we will detect all faces that are available in an image. After that, on each detected face, we will apply object detection for eyes, nose and mouth. As we know, human faces are rigid object and eyes, nose and mouth are located on particular region of the face. We can localize those features by setting "Region Of Interest" on each face to reduce calculation.

Here is the code
#include <iostream>
#include "stdio.h"
#include "opencv2/opencv.hpp"

using namespace std;
using namespace cv;

CvHaarClassifierCascade *cascade_f;
CvHaarClassifierCascade *cascade_e;
CvHaarClassifierCascade *cascade_n;
CvHaarClassifierCascade *cascade_m;
CvMemStorage   *storage;

#define NUM_OF_FEATURES 5

CvPoint2D32f frontal_frame_features[NUM_OF_FEATURES];

int detectFaces(IplImage *img, CvRect *face);
int detectFaceFeatures(IplImage *img, CvRect *face);
int tracking();

static const double pi = 3.14159265358979323846;

int main(int argc, char** argv)
{
    IplImage *img;
 const char *file1 = "C:/c-dev/OpenCV-2.2.0/data/haarcascades/haarcascade_frontalface_alt.xml";
    const char *file2 = "C:/c-dev/OpenCV-2.2.0/data/haarcascades/haarcascade_eye.xml";
 const char *file3 = "C:/c-dev/OpenCV-2.2.0/data/haarcascades/haarcascade_mcs_nose.xml";
 const char *file4 = "C:/c-dev/OpenCV-2.2.0/data/haarcascades/haarcascade_mcs_mouth.xml";

 cascade_f = (CvHaarClassifierCascade*)cvLoad(file1, 0, 0, 0);
    cascade_e = (CvHaarClassifierCascade*)cvLoad(file2, 0, 0, 0);
    cascade_n = (CvHaarClassifierCascade*)cvLoad(file3, 0, 0, 0);
    cascade_m = (CvHaarClassifierCascade*)cvLoad(file4, 0, 0, 0);

    /* setup memory storage, needed by the object detector */
    storage = cvCreateMemStorage(0);

    /* load image */
    img = cvLoadImage("lena.jpg", CV_LOAD_IMAGE_COLOR);

    assert(cascade_f && cascade_e && storage);

 const char* name = "Features-Detection";
    cvNamedWindow(name, 1);

 Mat frame = img;
 CvRect face;
 if (detectFaces(img, &face)) {
  if(detectFaceFeatures(img, &face)) {
   cout << "Features detected" << endl;
  }
 }
 cvShowImage(name, img);
 cvWaitKey(0);
    cvDestroyWindow(name);
    cvReleaseImage(&img);
    cvClearMemStorage(storage);

    return 0;
}

int detectFaces(IplImage *img, CvRect *face) {
    /* detect faces */
 CvSeq *faces = cvHaarDetectObjects(
            img, cascade_f, storage,
            1.1, 2, CV_HAAR_DO_CANNY_PRUNING, cvSize( 50, 50 ) );

 if (faces->total == 0) {
  return 0;
 }
 CvRect* _face = (CvRect*)cvGetSeqElem(faces, 0);
 face->x = _face->x;
 face->y = _face->y;
 face->width = _face->width;
 face->height = _face->height;
 return 1;
}

int detectFaceFeatures(IplImage *img, CvRect *face)
{
 int i;
 bool hasEyes = false;
 bool hasNose = false;
 bool hasMouth = false;
 cvRectangle(img,
    cvPoint(face->x, face->y),
    cvPoint(face->x + face->width, face->y + face->height),
    CV_RGB(255, 0, 0), 1, 8, 0);

    /* Set the Region of Interest*/
 CvRect eyeROI = cvRect(face->x, face->y + (face->height/5.5), face->width, face->height/3.0);
 CvRect noseROI = cvRect(face->x, face->y + (face->height/2.5), face->width, face->height/3.0);
 CvRect mouthROI = cvRect(face->x, face->y + (face->height/1.5), face->width, face->height/2.5);

 CvRect *r;
    /* detect eyes */
 cvSetImageROI(img, eyeROI);
 CvSeq* eyes = cvHaarDetectObjects(
           img, cascade_e, storage,
           1.15, 3, 0, cvSize(25, 15));
 cvResetImageROI(img);
 /* detect nose */
 cvSetImageROI(img, noseROI);
 CvSeq* noses = cvHaarDetectObjects(
           img, cascade_n, storage,
           1.1, 3, 0, cvSize(25, 15));
 cvResetImageROI(img);
 /* detect Mouth */
 cvSetImageROI(img, mouthROI);
 CvSeq* mouths = cvHaarDetectObjects(
            img, cascade_m, storage,
            1.1, 3, 0, Size(30, 30));
 cvResetImageROI(img);
 /* draw a rectangle for each eye found */
 for( i = 0; i < (eyes ? eyes->total : 0); i++ ) {
  r = (CvRect*)cvGetSeqElem( eyes, i );
  int x1 = r->x + eyeROI.x;
  int y1 = r->y + eyeROI.y;
  int x2 = x1 + r->width;
  int y2 = y1 + r->height;
  int xc = (x1 + x2)/2;
  int yc = (y1 + y2)/2;
  cvRectangle(img,
     cvPoint(x1, y1),
     cvPoint(x2, y2),
     CV_RGB(255, 0, 0), 1, 8, 0);
  cvCircle(img, cvPoint(xc, yc), 2, CV_RGB(255,0,0), 2, 8, 0);
  float fX = xc;
  float fY = yc;
  frontal_frame_features[i] = cvPoint2D32f(fX,fY);
  if (i==1) {
   hasEyes = true;
  }
 }
 /* draw a rectangle for each nose found */
 for( i = 0; i < (noses ? noses->total : 0); i++ ) {
  r = (CvRect*)cvGetSeqElem( noses, i );
  int x1 = r->x + noseROI.x;
  int y1 = r->y + noseROI.y;
  int x2 = x1 + r->width;
  int y2 = y1 + r->height;
  int xc = (x1 + x2)/2;
  int yc = (y1 + y2)/2;
  cvRectangle(img,
     cvPoint(x1, y1),
     cvPoint(x2, y2),
     CV_RGB(0, 255, 0), 1, 8, 0);
  cvCircle(img, cvPoint(xc, yc), 2, CV_RGB(255,0,0), 2, 8, 0);
  hasNose = true;
  float fX = xc;
  float fY = yc;
  frontal_frame_features[2] = cvPoint2D32f(fX,fY);
 }
 /* draw a rectangle for each mouth found */
 for( i = 0; i < (mouths ? mouths->total : 0); i++ ) {
  int margin_left = 10;
  int margin_right = 0;
  r = (CvRect*)cvGetSeqElem( mouths, i );
  int x1 = r->x + mouthROI.x;
  int y1 = r->y + mouthROI.y;
  int x2 = x1 + r->width;
  int y2 = y1 + r->height;
  int x1c = x1 + margin_left;
  int y1c = (y1 + y2)/2;
  int x2c = x2 - margin_right;
  int y2c = (y1 + y2)/2 - 5;
  cvRectangle(img,
     cvPoint(x1, y1),
     cvPoint(x2, y2),
     CV_RGB(0, 0, 255), 1, 8, 0);
  cvCircle(img, cvPoint(x1c, y1c), 2, CV_RGB(255,0,0), 2, 8, 0);
  cvCircle(img, cvPoint(x2c, y2c), 2, CV_RGB(255,0,0), 2, 8, 0);
  hasMouth = true;
  frontal_frame_features[3] = cvPoint2D32f(x1c,y1c);
  frontal_frame_features[4] = cvPoint2D32f(x2c,y2c);
 }
 return (hasEyes && hasNose && hasMouth)?1:0;
}


And result:

Monday, May 9, 2011

OpenCV with Eclipse CDT and MinGW

Prerequisites:
  1. CMake  http://www.cmake.org/ 
  2. MinGW with MSYS http://www.mingw.org/ 
  3. OpenCV source code http://sourceforge.net/projects/opencvlibrary/files/opencv-win/2.2/ 
  4. Eclipse CDT http://www.eclipse.org/cdt/
 Let say, you download OpenCV and put it into C:/c-dev/OpenCV-2.2.0
  1. Open MSYS and go to OpenCV folder
    cd C:/c-dev/OpenCV-2.2.0
  2. Run CMake command to generate "Build files"
    cmake -G "Unix Makefiles" .
  3.  Build OpenCV
    make
    make install
  4. Add bin directory to your PATH
    C:\c-dev\OpenCV-2.2.0\bin
After successfully build process you can see dll files in "bin" folder and library files ".dll.a" in "lib" folder. Now you are ready to make the first OpenCV program.
  • Create new project

  • Add header path

  • Add library path

  • Add libraries


Time to code

#include <iostream>
#include "opencv2/opencv.hpp"

int main(int argc, char** argv) {
cout << "Hello OpenCV" << endl;
IplImage* img = cvLoadImage("lena.jpg", CV_LOAD_IMAGE_COLOR);
if (img) {
cvShowImage("Hello OpenCV", img);
cvWaitKey(0);
cvDestroyAllWindows();
}
return 0;
}