Talk:Ratio estimator: Difference between revisions
No edit summary |
pointed out that Lahiri estimator is biased |
||
Line 21: | Line 21: | ||
| nocat = |
| nocat = |
||
}} 19:42, 1 July 2016 (UTC) |
}} 19:42, 1 July 2016 (UTC) |
||
I indicated that the Lahiri estimator is biased and recommended that the Midzuno-Sen technique be used exclusively. See code below. |
|||
<syntaxhighlight lang="R"> |
|||
# Lahiri algorithm, own implementation. Jan Galkowski. |
|||
# empirical_bayesian@ieee.org, 3rd July 2016 |
|||
# Last changed 3rd July 2016 |
|||
is.natural<- function(x) |
|||
{ |
|||
x<- (0 < x) & (x == floor(x)) |
|||
return(x) |
|||
} |
|||
lahiri.sampling<- function(x, n, per=10) |
|||
{ |
|||
stopifnot(is.natural(per)) |
|||
stopifnot(all(is.natural(x))) |
|||
M<- sum(x) |
|||
stopifnot( is.natural(n) ) |
|||
N<- length(x) |
|||
y.i<- rep(NA,n) |
|||
y<- rep(NA,n) |
|||
for (k in (1:n)) |
|||
{ |
|||
j<- sample(N, 1) |
|||
z<- sample(M, 1) |
|||
while( z > x[j] ) |
|||
{ |
|||
j<- sample(N, 1) |
|||
z<- sample(M,1) |
|||
} |
|||
y.i[k]<- j |
|||
y[k]<- x[j] |
|||
if (0 == k%%per) |
|||
{ |
|||
cat(sprintf("Lahiri sampling: Did %.0f\n", k)) |
|||
} |
|||
} |
|||
return(list(indices=y.i, sizes=y)) |
|||
} |
|||
lahiri.Midzuno.Sen.sampling<- function(x, n) |
|||
{ |
|||
# Called this by Sarndahl, Swensson, and Wretman |
|||
stopifnot(all(is.natural(x))) |
|||
stopifnot( is.natural(n) ) |
|||
N<- length(x) |
|||
y.i<- rep(NA,n) |
|||
y<- rep(NA,n) |
|||
p<- x/sum(x) |
|||
y.i[1]<- sample.int(N, 1, prob=p) |
|||
y[1]<- x[y.i[1]] |
|||
y.i[2:N]<- sample((1:N)[-y.i[1]], (N-1), replace=FALSE) |
|||
y[2:N]<- x[y.i[2:N]] |
|||
return(list(indices=y.i, sizes=y)) |
|||
} |
|||
# Test. |
|||
# General sample from a Gamma distribution with shape 2 and scale 10, |
|||
# meaning it has a mean of 20, and make sure it consists of positive |
|||
# integers. |
|||
X<- ceiling(rgamma(10000, shape=2, scale=10)) |
|||
# Empirical mean and median: |
|||
cat(sprintf("Mean[X]: %.3f, Median[X]: %.3f\n", mean(X), median(X))) |
|||
# Lahiri (runs for a while): |
|||
L<- lahiri.sampling(X, 100, per=20) |
|||
# Lahiri-Midzuno-Sen: |
|||
LMS<- lahiri.Midzuno.Sen.sampling(X, 100) |
|||
cat(sprintf("Lahiri Mean[X]: %.3f, Lahiri Median[X]: %.3f\n", mean(L$sizes), median(L$sizes))) |
|||
cat(sprintf("Lahari-Midzuno-Sen Mean[X]: %.3f, Lahiri-MidzunoSen Median[X]: %.3f\n", mean(LMS$sizes), median(LMS$sizes))) |
|||
</syntaxhighlight> |
|||
empirical_bayesian@ieee.org {{userbox |
|||
| border-c = #80a0a2 |
|||
| id-c = #ffffff |
|||
| info-c = #eaf2f3 |
|||
| id-s = |
|||
| info-op = text-align:center; |
|||
| id = [[Image:Fisher iris versicolor sepalwidth.svg|64px]] |
|||
| info = This user is a [[:Category:WikiProject Statistics members|member]] of '''[[WP:WikiProject Statistics|WikiProject Statistics]]'''. |
|||
| usercategory = WikiProject Statistics members |
|||
| nocat = |
|||
}} 15:39, 3 July 2016 (UTC) |
Revision as of 15:40, 3 July 2016
Statistics C‑class Mid‑importance | ||||||||||
|
This article is very poorly written, and no assumptions are stated to justify the following claims:
- "θy is known to be asymptotically normally distributed." - asymptotics requires a parameter going to infinity, which is never stated. Is the resulting normality a result of the Central Limit Theorem? If so, an independence assumption must be made, as well as a finite-variance assumption.
- "E(x*1/y) = E(x)*E(1/y)" - this requires independence of x & y, which is never stated.
--65.209.72.194 (talk) 15:02, 25 July 2014 (UTC)
I bet the description of Lahiri's method in this article is wrong. I don't know Lahiri's method but I'm guessing it's just the Midzuno-Sen method using rejection sampling. If that's correct I would move the description of Midzuno-Sen before the description of Lahiri and replace the description of the Lahiri method with a brief statement that it's Midzuno-Sen using rejection sampling to pick the first item. 2620:0:1003:1019:24E6:C515:AC70:A1BB (talk) 18:30, 3 September 2015 (UTC)
I have corrected the application of Lahiri's method and fixed poor citations of a couple of other references. The Lahiri method is based upon the textbook by Lohr, cited. Incidently, Lahiri's method is not limited to ratio estimators but is a general sampling technique.
empirical_bayesian@ieee.org
This user is a member of WikiProject Statistics. |
19:42, 1 July 2016 (UTC)
I indicated that the Lahiri estimator is biased and recommended that the Midzuno-Sen technique be used exclusively. See code below.
# Lahiri algorithm, own implementation. Jan Galkowski.
# empirical_bayesian@ieee.org, 3rd July 2016
# Last changed 3rd July 2016
is.natural<- function(x)
{
x<- (0 < x) & (x == floor(x))
return(x)
}
lahiri.sampling<- function(x, n, per=10)
{
stopifnot(is.natural(per))
stopifnot(all(is.natural(x)))
M<- sum(x)
stopifnot( is.natural(n) )
N<- length(x)
y.i<- rep(NA,n)
y<- rep(NA,n)
for (k in (1:n))
{
j<- sample(N, 1)
z<- sample(M, 1)
while( z > x[j] )
{
j<- sample(N, 1)
z<- sample(M,1)
}
y.i[k]<- j
y[k]<- x[j]
if (0 == k%%per)
{
cat(sprintf("Lahiri sampling: Did %.0f\n", k))
}
}
return(list(indices=y.i, sizes=y))
}
lahiri.Midzuno.Sen.sampling<- function(x, n)
{
# Called this by Sarndahl, Swensson, and Wretman
stopifnot(all(is.natural(x)))
stopifnot( is.natural(n) )
N<- length(x)
y.i<- rep(NA,n)
y<- rep(NA,n)
p<- x/sum(x)
y.i[1]<- sample.int(N, 1, prob=p)
y[1]<- x[y.i[1]]
y.i[2:N]<- sample((1:N)[-y.i[1]], (N-1), replace=FALSE)
y[2:N]<- x[y.i[2:N]]
return(list(indices=y.i, sizes=y))
}
# Test.
# General sample from a Gamma distribution with shape 2 and scale 10,
# meaning it has a mean of 20, and make sure it consists of positive
# integers.
X<- ceiling(rgamma(10000, shape=2, scale=10))
# Empirical mean and median:
cat(sprintf("Mean[X]: %.3f, Median[X]: %.3f\n", mean(X), median(X)))
# Lahiri (runs for a while):
L<- lahiri.sampling(X, 100, per=20)
# Lahiri-Midzuno-Sen:
LMS<- lahiri.Midzuno.Sen.sampling(X, 100)
cat(sprintf("Lahiri Mean[X]: %.3f, Lahiri Median[X]: %.3f\n", mean(L$sizes), median(L$sizes)))
cat(sprintf("Lahari-Midzuno-Sen Mean[X]: %.3f, Lahiri-MidzunoSen Median[X]: %.3f\n", mean(LMS$sizes), median(LMS$sizes)))
empirical_bayesian@ieee.org
This user is a member of WikiProject Statistics. |
15:39, 3 July 2016 (UTC)