#ifndef _theplu_yat_statistics_fisher_
#define _theplu_yat_statistics_fisher_
// $Id: Fisher.h 1000 2007-12-23 20:09:15Z jari $
/*
Copyright (C) 2004, 2005 Peter Johansson
Copyright (C) 2006 Jari Häkkinen, Peter Johansson
Copyright (C) 2007 Peter Johansson
This file is part of the yat library, http://trac.thep.lu.se/yat
The yat library is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The yat library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
02111-1307, USA.
*/
#include "Score.h"
#include
#include
namespace theplu {
namespace yat {
namespace utility {
class vector;
}
namespace statistics {
/**
@brief Fisher's exact test.
Fisher's Exact test is a procedure that you can use for data
in a two by two contingency table: \f[ \begin{tabular}{|c|c|}
\hline a&b \tabularnewline \hline c&d \tabularnewline \hline
\end{tabular} \f] Fisher's Exact Test is based on exact
probabilities from a specific distribution (the hypergeometric
distribution). There's really no lower bound on the amount of
data that is needed for Fisher's Exact Test. You do have to
have at least one data value in each row and one data value in
each column. If an entire row or column is zero, then you
don't really have a 2 by 2 table. But you can use Fisher's
Exact Test when one of the cells in your table has a zero in
it. Fisher's Exact Test is also very useful for highly
imbalanced tables. If one or two of the cells in a two by two
table have numbers in the thousands and one or two of the
other cells has numbers less than 5, you can still use
Fisher's Exact Test. For very large tables (where all four
entries in the two by two table are large), your computer may
take too much time to compute Fisher's Exact Test. In these
situations, though, you might as well use the Chi-square test
because a large sample approximation (that the Chi-square test
relies on) is very reasonable. If all elements are larger than
10 a Chi-square test is reasonable to use.
@note The statistica assumes that each column and row sum,
respectively, are fixed. Just because you have a 2x2 table, this
assumtion does not necessarily match you experimental upset. See
e.g. Barnard's test for alternative.
*/
class Fisher
{
public:
///
/// Default Constructor.
///
Fisher(void);
///
/// Destructor
///
virtual ~Fisher(void);
///
/// @return Chi2 score
///
double Chi2(void) const;
/**
Calculates the expected values under the null hypothesis.
\f$ a' = \frac{(a+c)(a+b)}{a+b+c+d} \f$,
\f$ b' = \frac{(a+b)(b+d)}{a+b+c+d} \f$,
\f$ c' = \frac{(a+c)(c+d)}{a+b+c+d} \f$,
\f$ d' = \frac{(b+d)(c+d)}{a+b+c+d} \f$,
*/
void expected(double& a, double& b, double& c, double& d) const;
///
/// If all elements in table is at least minimum_size(), a Chi2
/// approximation is used for p-value calculation.
///
/// @return reference to minimum_size
///
u_int& minimum_size(void);
///
/// If all elements in table is at least minimum_size(), a Chi2
/// approximation is used for p-value calculation.
///
/// @return const reference to minimum_size
///
const u_int& minimum_size(void) const;
///
/// If oddsratio is larger than unity, two-sided p-value is equal
/// to 2*p_value_one_sided(). If oddsratio is smaller than unity
/// two-sided p-value is equal to 2*(1-p_value_one_sided()). If
/// oddsratio is unity two-sided p-value is equal to unity.
///
/// If all elements in table is at least minimum_size(), a Chi2
/// approximation is used.
///
/// @return 2-sided p-value
///
double p_value() const;
///
/// One-sided p-value is probability to get larger (or equal) oddsratio.
///
/// If all elements in table is at least minimum_size(), a Chi2
/// approximation is used.
///
/// @return One-sided p-value
///
double p_value_one_sided() const;
/**
Function calculating odds ratio from 2x2 table
\f[ \begin{tabular}{|c|c|}
\hline a&b \tabularnewline \hline c&d \tabularnewline \hline
\end{tabular} \f] as \f$ \frac{ad}{bc} \f$
@return odds ratio.
@throw If table is invalid a runtime_error is thrown. A table
is invalid if a row or column sum is zero.
*/
double oddsratio(const u_int a, const u_int b,
const u_int c, const u_int d);
private:
bool calculate_p_exact() const;
// two-sided
double p_value_approximative(void) const;
//two-sided
double p_value_exact(void) const;
u_int a_;
u_int b_;
u_int c_;
u_int d_;
u_int minimum_size_;
double oddsratio_;
};
}}} // of namespace statistics, yat, and theplu
#endif