***

This program is designed for readers interested in reproducing the results in the paper: "The Value of 
Crowdsourced Earnings Forecasts"

Please note that some files labelled "mylib" need to first be collected by the authors. Below is a list of datasets used:

1) Mylib.Estimize - Estimize forecast data provided by Leigh Drogen at Estimize
2) Mylib.Detail forecasts - Detailed earnings forecasts provided by IBES
3) Mylib.Ret11_13 & MylibVolume_Shrout11_13 - daily returns, prices, volume and shares outstanding from CRSP
4) Mylib.Compustat10_14 - annual book value of equity (CEQ) from Compustat
5) Mylib.IBES_Ticker_permno - a file that matches IBES-Ticker with CRSP Permno available here: http://www.bhwang.com/research.html
6) Mylib.IBES Consensus - summary consensus forecast provided by IBES
7) Mylib.cumday - a file that counts the cumulative number of trading days from January 1, 2013. (Dec 30, 2011 =0, Jan 3, 2012 =1, Jan 4, 2012 =2, Dec 31, 2013 = 502)
8) Mylib.Detail_recs - a filte that contains detailed analyst recommendations provided by IBES

* other "mylib" files are constructed within the program;


****
I have split the analysis into six parts:

Part 1 - Constructs the two main Estimize samples as reported in Panels A and B of Table 1.
Part 2 - Provides summary statistics on firm characteristics associated with Estimize and IBES Samples (TABLE 2)
Part 3 - Provides summary statistics at the individual forecast level (Tables 3 & 4, and Figure 1)
Part 4 - Reports results for Consensus Forecast Accuracy Tests (Tables 5-8)
Part 5 - Reports results from the Earnings Response Coefficient Tests (Table 9 and Figure 2)
Part 6 - Reports results from the Price Impact around Forecast Revision Tests (Table 10 and Figure 3)




****

								Part 1: Construction of Estimize and ESTIMIZE-IBES Matched Sample (Results in Table 1)

****





*start with estimize data provided by Leigh Drogen of Estimize;

data estimize;
set mylib.estimize;
**** limit dataset to variables used in the analysis;
keep ticker fiscal_year fiscal_quarter epsactual epswallstreet eps  estdate username actdate flagged bio1 bio2 bio3 ;
run; 




*dates are currently in numeric format - need to covert to dates;


data estimize;
set estimize;
format est_date yymmdd10.;
format ACT_date yymmdd10.;

EST_date=input(trim(ESTdate),YYMMDD10.);
ACT_date=input(trim(ACTdate),YYMMDD10.);


est_year = year(est_date);
act_year = year (act_date);
weekday = weekday(est_date);

*make a new day variable that adds 1 if the date is on Sunday and 2 if the date is on Saturday;
est_date2 = est_date;
if weekday =1 then est_date2 = est_date2 +1;
if weekday =7 then est_date2 = est_date2 +2;
FORECAST_AGE = ACT_dATE - EST_DATE2;
run;




proc sort data=estimize;
by act_year;
run;

*limiting sample to forecasts that were issued in 2012 or 2013 and for which the earnings announcement date was in 2012 or 2013;
data estimize2;
set estimize;
if 2012<=est_year<=2013;
if 2012<=act_year <=2013;
run;


proc sort nodupkey data=estimize2 out=firms;
by ticker;
run;

proc sort nodupkey data=estimize2 out=firm_quarters;
by ticker act_date;
run;
proc sort nodupkey data=estimize2 out=contributors;
by username;
run;


*will delete observations Flagged by Estimize as inaccurate in main tests;


data estimize3;
set estimize2;
if flagged = 'TRUE' then delete;
run;


*deleting observations where all information is the same;

PROC SORT NODUPKEY DATA=ESTIMIZE3 out=estimize4;
BY fiscal_year fiscal_quarter ticker  EST_dATE2 act_date epsactual forecast_age USERNAME  eps;
RUN;


*if an analyst issues multiple forecasts on the same day - combine into one average forecast;

proc univariate noprint data=estimize4;
BY fiscal_year fiscal_quarter ticker  EST_dATE2 act_date epsactual forecast_age USERNAME flagged;
var eps;
output out=estimize4 MEAN=eps min=min_eps max=maX_eps n=OBS;
RUN;



*deleting forecasts that are issued after the announcement data and forecasts that are issued 90 days prior to the earnings announcement date;

data estimize5;
set estimize4;
if forecast_age >90 then delete;
if forecast_age <0 then delete;
act_year = year(act_date);
act_month = month(act_date);
if act_month <=3 then quarter =1;
if 4<=act_month <=6 then quarter =2;
if 7<=act_month <=9 then quarter =3;
if act_month >9 then quarter =4;
cum_quarter = (act_year - 2012) *4 + quarter;
year = act_year;
*create an observation variable that allows me to easily track lost observations in subsequent merges;

obs_num = _n_;
run;




*mylib.estimize2 is the "Final Estimize Sample" as reported in Table 1 of the summary statistics;
data mylib.estimize2;
set estimize5;
run;



*will next merge the IBES detail data with Estimize;


*limit sample to: quarterly forecasts, announcement and estimate date in 2012 and 2013, non-anonymous analysts, and forecast age between 0 and 90 days;

data ibes;
set mylib.detail_forecasts;
if fpi =6;
est_year = year(anndats);
if 2012<=est_year <=2013;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
forecast_age = ANNDATS_ACT - anndats;
if 0<=forecast_age<=90;
if ANALYS =0  then delete;
run;

proc sort nodupkey data=ibes;
by oftic anndats_act ANALYS;
run;



proc univariate noprint data=ibes;
by oftic anndats_act;
var ACTUAL;
output out=ibes2 mean=actual std=std_actual N=ibes_coverage;
run;

data ibes2;
set ibes2;
if oftic = '' then delete;
run;





*will now require an IBES observation and be on same basis (i.e., eps is the same or can be properly adjusted);



proc sql;
create table ESTIMIZE6 as
select *
from ESTIMIZE5, ibes2
where ESTIMIZE5.ticker =ibes2.oftic and ESTIMIZE5.ACT_DATE = ibes2.ANNDATS_ACT;
quit;





**** the adj_factor looks at whether IBES and Estimize earnings are within one-percent of each other;

data estimize6;
set estimize6;
adj_factor = actual/epsactual;
if .99 <=adj_factor <=1.01 then adj_factor =1;
if adj_factor =1 then good =1;
else good =0;
run;
proc univariate data=estimize6;
var good;
run;



*want to compute the standard deviation of the adjustment factor,
*if constant then will simply adjust Estimize forecasts by this constant factor (likely driven by stock splits);

proc sort nodupkey data=estimize6 out=test;
by ticker good actual;
run;
proc univariate noprint data=test;
by ticker good ;
var adj_factor;
output out=test2 mean=adj_factor std=std_factor min=min_factor max=max_factor N=obs;
run;

*anecdotal test that the difference between two samples for Apple is the 7:1 stock split of Apple;
DATA AAPL;
SET TEST2;
IF TICKER = 'AAPL';
RUN;



proc sort data=test2;
by std_factor;
run;
data test3;
set test2;
if 0 <=std_factor<0.001;
adj_flag =1;
keep ticker GOOD adj_flag min_factor max_factor std_factor;
run;

proc sql;
create table estimize6b  as
select *
from estimize6, test3
where estimize6.ticker = test3.ticker AND ESTIMIZE6.GOOD = TEST3.GOOD;
quit;


proc sort data=estimize6;
by obs_num;
run;
proc sort data=estimize6b;
by obs_num;
run;

data estimize7;
merge estimize6 estimize6b;
by obs_num;
if good =0 and adj_flag = . then delete;
estimize_sample=1;
*making stock-split adjustment;
VALUE = eps * adj_factor;
abs_ferr = abs (value - actual);
signed_ferr =  actual - value;
*keep eps obs_num ticker forecast_age actual estimize_sample username est_date2 forecast_age value abs_ferr signed_ferr ANNDATS_ACT cum_quarter;
run;






proc sort nodupkey data=estimize7;
by obs_num;
run;

*this is the final dataset used in Panel B of Table 1; 

data mylib.estimize_filtered;
set estimize7;
run;

*quick summary statistics on the number of firms, firm-quarters and contributors;

proc sort nodupkey data=estimize7 out=firms;
by ticker;
run;
proc sort nodupkey data=estimize7 out=firm_quarters;
by ticker act_date;
run;
proc sort nodupkey data=estimize7 out=contributors;
by username;
run;



****

	Part 2: will provide some summary statistics on firm characteristics associated with Estimize and IBES samples (Table 2);


****



*NOW WANT TO LOOK AT FIRM CHARACERISTICS ASSOCIATED WITH IBES AND ESTIMIZE SAMPLES;


*1) merge return and volume data (both from CRSP);


data ret;
set mylib.ret11_13;
prc = abs(prc);
keep permno prc date ret;
run;
data vol;
set mylib.volume_shrout11_13;
keep permno vol shrout date;
run;
data ret;
merge ret vol;
by permno date;
turn = vol/shrout;
size = prc * shrout;
year = year(date);
run;


proc sort data=ret;
by permno year descending date;
run;
data size;
set ret;
if size = . then delete;
keep permno year size;
run;
proc sort nodupkey data=size out=size;
by permno year;
run;
proc univariate noprint data=ret;
by permno year;
var ret;
output out=vol std=vol;
run;
proc univariate noprint data=ret;
by permno year;
var turn;
output out=turn mean=turn;
run;

data char;
merge size turn vol;
by permno year;
run;


data year;
set mylib.ret11_13;
year = year(date);
keep permno year cusip ncusip ticker;
run;


proc sort nodupkey data=year out=year;
by permno year;
run;

data char;
merge char year;
by permno year;
run;


*will merge with compustat book data;

* Will use datadate which reflect the year in which the fiscal year ended;


data ceq;
length cusip2 $8;
set mylib.compustat10_14;
year = year(datadate);
cusip2 = cusip;
keep year cusip2 tic ceq;
run;
proc sql;
create table char2 as
select *
from char, ceq
where char.ncusip = ceq.cusip2 and char.year = ceq.year;
quit;

proc sort data=char;
by permno year;
run;
proc sort data=char2;
by permno year;
run;
data mylib.char;
merge char char2;
by permno year;
bm = ceq/size *1000;
*winsorize bm at 5 and 0;
if bm > 5 then bm =5;
if bm <0 then bm =0;
log_turn = log(turn);
log_vol = log(vol);

log_size = log(size);
log_vol = log(vol);
log_turn = log(turn);
log_bm = log(bm);
run;




*also want to compute analyst coverage;

data ibes;
set mylib.detail_forecasts;
if oftic = '' then delete;
if analys = 0 then delete;
year = year(anndats_act);
if year = . then delete;
ibes_cusip = cusip;
run;
proc sort nodupkey data=ibes out=ibes2;
by oftic  ibes_cusip year analys ;
run;
proc sort data=ibes2;
by oftic ibes_cusip year;
run;

*ibes coverage is an annual measure;
proc univariate noprint data=ibes2;
by oftic ibes_cusip year;
var analys;
output out=coverage N=ibes_coverage;
run;

data coverage;
set coverage;
ticker = oftic;
if year = 2014 then delete;
keep year ticker oftic ibes_coverage ibes_cusip;
run;
proc sort data=coverage;
by ticker year;
proc sort data=mylib.char;
by ticker year;
run;
data char;
merge mylib.char coverage;
by ticker year;
if ticker = '' then delete;
if size = . then delete;
if ibes_coverage = . then ibes_coverage =0;
run;
data mylib.char2;
set char;
run;
*/


 


*for each stock, let's look at ibes and estimize coverage during the given year;




**will start with estimize;
data estimize;
set  mylib.estimize2;
run;



*just want to look at number of unique analysts making a forecast each quarter);


proc sort nodupkey data=estimize out=estimize2;;
by ticker cum_quarter  act_date year username;  ;
run;
proc univariate noprint data=estimize2;
by ticker cum_quarter act_date year ;
var eps;
output out=forecasts N=forecasts;
run;



*will next report values for ibes;



data ibes;
set mylib.detail_forecasts;
if fpi =6;
est_year = year(anndats);
if 2012<=est_year <=2013;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
forecast_age = ANNDATS_ACT - anndats;
if analys = 0 then delete;
if 0<=forecast_age<=90;
run;




*will look at total ibes coverage each quarter;


proc sort nodupkey data=ibes out=ibes2;
by oftic  cum_quarter year  anndats_act cusip analys;
run;


*ibes forecasts is a quarterly measure, will use ibes_forecasts and not ibes coverage, since Estimize is also at quarterly frequency;
proc univariate noprint data=ibes2;
by oftic cum_quarter year  anndats_act cusip;
var value;
output out=ibes_forecasts N=ibes_forecasts;
run;
data ibes_forecasts;
set ibes_forecasts;
ticker = oftic;
run;
data forecasts;
set forecasts;
estimize_ticker = ticker;
run;


proc sort data=ibes_forecasts;
by ticker anndats_act;
run;

data forecasts;
set forecasts;
anndats_act =act_date;
estimize =1;
run;

proc sort data=forecasts;
by ticker anndats_act;
run;

data analysts2;
merge ibes_forecasts forecasts;
by ticker anndats_act;
char_year = year -1;
obs_num = _N_;
run;

data test;
set analysts2;
if estimize =1;
if ibes_forecasts = .;
run;





data char;
set mylib.char2;
char_year = year;
if ncusip = . then ncusip =-99;
drop year;
run;

proc sql;
create table analysts3 as
select *
from analysts2, char
where analysts2.char_year = char.char_year and analysts2.ticker = char.ticker;
quit;
proc sql;
create table analysts3b as
select *
from analysts2, char
where analysts2.char_year = char.char_year and analysts2.cusip = char.ncusip;
quit;



proc sort data=analysts3;
by obs_num;
run;
proc sort data=analysts3b;
by obs_num;
run;
data analysts3;
merge analysts3 analysts3b;
by obs_num;
run;

proc sort nodupkey data=analysts3;
by obs_num;
run;







data analysts3;
set analysts3;
estimize_dummy =1;
if forecasts <1 then estimize_dummy =0;
ibes_dummy =1;
if ibes_forecasts <1 then ibes_dummy =0;
size2 = size/1000000;
if cum_quarter = 9 then delete;
run;






*** reporting summary statistics for Panel A of Table 2 ***;


proc univariate data=analysts3;
var forecasts ibes_forecasts  size2 bm vol turn ;
where estimize_dummy =1 and ibes_dummy =1;
run;



proc univariate data=analysts3;
var forecasts ibes_forecasts size2 bm vol turn ;
where ibes_dummy =1 and estimize_dummy ~=1 ;
run;
proc univariate data=analysts3;
var forecasts ibes_forecasts size2 bm vol turn ;
where ibes_dummy ~=1 and estimize_dummy =1 ;
run;



*will now sort firm-quarters in quartiles based on Estimize coverage rank or IBES coverage rank  (Panel B of Table 2);



data analysts4;
set analysts3;
forecast_rank = forecasts;
ibes_rank = ibes_forecasts;
if estimize_dummy =1 and ibes_dummy =1;
run;


proc sort data=analysts4;
by cum_quarter;
run;

proc rank data=analysts4 out=analysts4 groups =4;
var forecast_rank ibes_rank;
run;
proc sort data=analysts4;
by forecast_rank;
run;
proc univariate data=analysts4;
by forecast_rank;
var forecasts ibes_forecasts size2 bm vol turn ;
run;
proc sort data=analysts4;
by ibes_rank;
run;
proc univariate data=analysts4;
by ibes_rank;
var forecasts ibes_forecasts size2 bm vol turn ;
run;



****

					Part 3: Summary Statistics at the Individual Forecast Level (Tables 3 &4, and Figure 1)


****





*want to compare IBES & Estimize forecast characteristics;
*can start with the merged IBES and estimize sample (the 37,031 observations that meet the basic + ibes filter (see Panel B of Table 1);


data estimize3;
set mylib.estimize_filtered;
run;



*want some summary statistics for the estimize sample;


*now want to combine with IBES forecasts;



data ibes;
set mylib.detail_forecasts;
if fpi =6;
est_year = year(anndats);
if 2012<=est_year <=2013;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
forecast_age = ANNDATS_ACT - anndats;
if 0<=forecast_age<=90;
if ANALYS =0  then delete;
run;

data permno;
set mylib.ibes_ticker_permno;
end_year = year(enddate);
if end_year <2011 then delete;
keep ticker permno startdate enddate;
run;

proc sql;
create table ibes as
select *
from ibes, permno
where ibes.ticker = permno.ticker;
quit;




*will now require at least on estimize forecast;

proc sort nodupkey data=estimize3 out=sample;
by ticker ANNDATS_ACT;
run;
proc sql;
create table ibes2 as
select *
from ibes, sample
where ibes.oftic = sample.ticker and ibes.anndats_act = sample.anndats_act;
quit;

data ibes3;
set ibes2;
estimize_sample=0;
if analys =0  then delete;
abs_ferr = abs (actual - value);
signed_ferr =   actual - value;
keep ticker forecast_age actual estimize_sample anndats  forecast_age value abs_ferr signed_ferr ANNDATS_ACT analys oftic cum_quarter permno;
run;


data sample;
set ibes3;
keep oftic  permno cum_quarter;
run;
proc sort nodupkey data=sample;
by oftic permno cum_quarter;
run;

*want to ensure that all estimize forecasts have a corresponding ibes forecast;
proc sql;
create table estimize3 as
select *
from estimize3, sample
where estimize3.ticker = sample.oftic and estimize3.cum_quarter = sample.cum_quarter;
quit;



data full_sample;
set estimize3 ibes3;
*will drop earning announcements that occur in 2014 (cum_quarter =9) since I do not have the complete set of estimize forecasts;
if cum_quarter = 9 then delete;
run;





*will start with summary statistics for the Estimize sample (Panel A of Table 3);

***forecasts per firm-quarter per forecaster -estimize;
proc sort data=full_sample;
by username oftic cum_quarter act_date;
run;
proc univariate noprint data=full_sample;
by username oftic cum_quarter act_date;
var eps;
output out=forecasts_per_user_per_quarter N = forecasts_per_user_per_quarter;
where estimize_sample =1;
run;
proc univariate data=forecasts_per_user_per_quarter;
var forecasts_per_user_per_quarter;
run;



**forecasts per firm-quarter -estimize;
proc sort data=full_sample;
by  oftic cum_quarter act_date;
run;
proc univariate noprint data=full_sample;
by  oftic cum_quarter act_date;
var eps;
output out=forecasts_per_firm_quarter N = forecasts_per_firm_quarter;
where estimize_sample =1;
run;
proc univariate data=forecasts_per_firm_quarter;
var forecasts_per_firm_quarter;
run;


*total number of unique forecasters per firm-quarter;
proc sort nodupkey data=full_sample out=full_sample2;
by  oftic cum_quarter act_date username;
run;
proc univariate noprint data=full_sample2;
by  oftic cum_quarter act_date;
var eps;
output out=forecasters_per_quarter N = forecasters_per_quarter;
where estimize_sample =1;
run;
proc univariate data=forecasters_per_quarter;
var forecasters_per_quarter;
run;



*firms covered per forecaster per quarter;
proc sort nodupkey data=full_sample out=full_sample2;
by  username  cum_quarter oftic;
run;
proc univariate noprint data=full_sample2;
by  username cum_quarter;
var eps;
output out=firms_covered N = firms_covered;
where estimize_sample =1;
run;
proc univariate data=firms_covered;
var firms_covered;
run;

proc sort data=full_sample;
by oftic cum_quarter anndats_act;
run;
proc univariate noprint data=full_sample;
by oftic cum_quarter anndats_act;
var forecast_age;
output out=est_age mean=est_age median=est_med_age;
where estimize_sample =1;
run;
proc univariate data=est_age;
var est_age;
run;


will now repeat above test for ibes (Panel B of Table 3)


***forecasts per firm-quarter per forecaster -IBES;
proc sort data=full_sample;
by analys oftic cum_quarter ANNDATS_ACT;
run;
proc univariate noprint data=full_sample;
by analys oftic cum_quarter ANNDATS_ACT;
var value;
output out=forecasts_per_user_per_quarter N = forecasts_per_user_per_quarter;
where estimize_sample =0;
run;
proc univariate data=forecasts_per_user_per_quarter;
var forecasts_per_user_per_quarter;
run;

**forecasts per firm-quarter  - IBES;
proc sort data=full_sample;
by  oftic cum_quarter ANNDATS_ACT;
run;
proc univariate noprint data=full_sample;
by  oftic cum_quarter ANNDATS_ACT;
var value;
output out=forecasts_per_firm_quarter N = forecasts_per_firm_quarter;
where estimize_sample =0;
run;
proc univariate data=forecasts_per_firm_quarter;
var forecasts_per_firm_quarter;
run;


*total number of unique forecasters per firm-quarter;
proc sort nodupkey data=full_sample out=full_sample2;
by  oftic cum_quarter ANNDATS_ACT analys;
run;
proc univariate noprint data=full_sample2;
by  oftic cum_quarter ANNDATS_ACT;
var value;
output out=forecasters_per_quarter N = forecasters_per_quarter;
where estimize_sample =0;
run;
proc univariate data=forecasters_per_quarter;
var forecasters_per_quarter;
run;



*firms covered per forecaster per quarter;
proc sort nodupkey data=full_sample out=full_sample2;
by  analys  cum_quarter  oftic;
run;
proc univariate noprint data=full_sample2;
by  analys cum_quarter ;
var value;
output out=firms_covered N = firms_covered;
where estimize_sample =0;
run;
proc univariate data=firms_covered;
var firms_covered;
run;



proc sort data=full_sample;
by oftic cum_quarter ANNDATS_ACT;
run;
proc univariate noprint data=full_sample;
by oftic cum_quarter ANNDATS_ACT;
var forecast_age;
output out=ibes_age mean=ibes_age median=ibes_med_age;
where estimize_sample ~=1;
run;
proc univariate data=ibes_age;
var ibes_age ibes_med_age;
run;



*also will plot   frequency of forecasts by forecast age (used to create Figure 1);
proc sort data=full_sample;
by descending forecast_age;
run;
proc univariate noprint data=full_sample;
by  descending forecast_age;
var actual;
output out=est_fig1 N=obs;
where estimize_sample =1;
run;
proc sort data=full_sample;
by descending forecast_age;
run;
proc univariate noprint data=full_sample;
by  descending forecast_age;
var actual;
output out=ibes_fig1 N=obs;
where estimize_sample ~=1;
run;





*now want to compute the following forecast statistics (will report in Table 4);

*accuracy, bias, and boldness:

1) forecast age = forecast_date - earnings_date

*Some notation: AFE = |actual - estimate|
SFE = actual - estimate


2) forecast accuracy (PMAFE) = (AVE - AVE_AFE)/AVE_AFE
3) forecast bias (PMSFE) = SFE/Price
4) Boldness (or Deviation from Consensus) = |estimate - consensus_estimate|/ave |estimate - consensus_estimate|;



data full_sample;
set estimize3 ibes3;
*will drop earning announcements that occur in 2009 since I do not have the complete set of estimize forecasts;
if cum_quarter = 9 then delete;
run;
DATA FULL_SAMPLE;
SET FULL_SAMPLE;
*TICKER2 = TICKER;
*IF ESTIMIZE_SAMPLE ~=1 THEN TICKER2 = OFTIC;
*THE UNIT OF OBSERVATION FOR ALL OF THIS ANALYSIS IS A FIRM-QUARTER - THUS WILL CREATE AN ID VARIABLE TO MATCH THIS UNIT;
time = cum_quarter ||anndats_act;
*id corresponds to unique firm-quarters;
id = oftic||time;
tracker = _N_;
RUN;


*this comes from CRSP data;
data prc;
set mylib.ret11_13;
prc = abs(prc);
year = year(date);
month = month(date);
if month <=3 then q =1;
if 4<=month<=6 then q =2;
if 7<=month<=9 then q =3;
if 10<=month<=12 then q=4;
cum_quarter = (year - 2012)*4 + q;
if ticker = '' then delete;
keep cum_quarter permno ticker date prc;
run;
proc sort nodupkey data=prc;
by permno cum_quarter;
run;


proc sql;
create table full_sample2 as
select *
from full_sample, prc
where full_sample.permno = prc.permno and full_sample.cum_quarter = prc.cum_quarter;
quit;
proc sort data=full_sample;
by tracker;
run;
proc sort data=full_sample2;
by tracker;
run;



*will winsorize signed_ferr_prc error at -0.02 and 0.02 (roughly the 0.5 and 99.5 percentile);
data full_sample;
merge full_sample full_sample2;
by tracker;
signed_ferr_prc = signed_ferr/prc;
if signed_ferr_prc  >.02 then signed_ferr_prc = 0.02;
if -100<signed_ferr_prc <-0.02 then signed_ferr_prc = -0.02;
run;
proc univariate data=full_sample;
var signed_ferr_prc;
run;




PROC SORT DATA=FULL_SAMPLE;
BY ID;
RUN;
*compute the AVE_ABS_FERR ave_signed_ferr for each firm-quarter;
PROC UNIVARIATE NOPRINT DATA=FULL_SAMPLE;
BY ID;
VAR ABS_FERR signed_ferr;
OUTPUT OUT=AVE_ABS_FERR MEAN = AVE_ABS_FERR ave_signed_ferr n=ABS_FERR_OBS;
RUN;

PROC SQL;
CREATE TABLE FULL_SAMPLE AS
SELECT *
FROM FULL_SAMPLE, AVE_ABS_FERR
WHERE FULL_SAMPLE.ID = AVE_ABS_FERR.ID ;
QUIT;





DATA FULL_SAMPLE2;
SET FULL_SAMPLE;
*pmafe is primary measure of accuracy;
PMAFE = (ABS_FERR-AVE_ABS_FERR)/AVE_ABS_FERR;
*pmsfe is one measure of bias;
PMSFE = (signed_FERR - AVE_SIGNED_FERR)/AVE_ABS_FERR;
*bias dummy is an alternative (non-continuous) measure of bias;
if signed_ferr >0 then bias_dummy =1;
if signed_ferr <=0 then bias_dummy =0;
run;


*compute average accuracy and bias for estimize forecasts;

proc sort data=FULL_SAMPLE2;
by ID;
run;
proc univariate noprint data=FULL_SAMPLE2;
by ID;
var pmafe pmsfe signed_ferr_prc bias_dummy;
output out=est_pmafe mean=est_pmafe est_pmsfe est_signed_ferr_prc est_bias median=est_med_pmafe est_med_pmsfe est_med_signed_ferr_prc est_med_bias;
where estimize_sample =1;
run;

*compute average accuracy and bias for IBES forecasts;
proc sort data=full_sample2;
by ID;
run;
proc univariate noprint data=full_sample2;
by ID;
var pmafe pmsfe signed_ferr_prc bias_dummy;
output out=ibes_pmafe mean=ibes_pmafe ibes_pmsfe ibes_signed_ferr_prc ibes_bias median=ibes_med_pmafe ibes_med_pmsfe  ibes_med_signed_ferr_prc ibes_med_bias;
where estimize_sample ~=1;
run;

PROC SORT DATA=EST_PMAFE;
BY ID;
RUN;
PROC SORT DATA=IBES_PMAFE;
BY ID;
RUN;



proc sql;
create table est_pmafe2 as
select *
from est_pmafe, ibes_pmafe
where est_pmafe.ID = ibes_pmafe.ID;
quit;

proc means data=est_pmafe2;
var est_pmafe est_med_pmafe ibes_pmafe ibes_med_pmafe 
est_pmsfe est_med_pmsfe ibes_pmsfe ibes_med_pmsfe
est_bias est_med_bias ibes_bias ibes_med_bias;
run;

data est_pmafe2;
set est_pmafe2;
dif_pmafe = est_pmafe - ibes_pmafe;
dif_med_pmafe = est_med_pmafe - ibes_med_pmafe;
dif_pmsfe = est_pmsfe - ibes_pmsfe;
dif_med_pmsfe = est_med_pmsfe - ibes_med_pmsfe;
dif_signed_ferr_prc = est_signed_ferr_prc - ibes_signed_ferr_prc;
dif_bias = est_bias - ibes_bias;
dif_med_bias = est_med_bias - ibes_med_bias;
run;



* WANT to compute the consensus for any given point in time prior to the forecast;


proc sort data=full_sample2;
by id descending forecast_age;
run;

*THIS RANKS FORECASTS IN TERMS OF THEIR AGE;
data full_sample3;
set full_sample2;
count +1;
by id;
if first.id then count =1;
run;
data full_sample4;
set full_sample3;
by id;
retain summed_estimate;
if first.id  then summed_estimate = 0;
summed_estimate = summed_estimate + value;
consensus = (summed_estimate - value)/(count -1);
if first.id then consensus = .;
boldness = abs(value - consensus);
run;
proc means noprint data=full_sample4;
by id;
var boldness;
output out=ave_boldness mean=ave_boldness;
run;
proc sql;
create table full_sample4 as
select *
from full_sample4, ave_boldness
where full_sample4.id = ave_boldness.id;
quit;
data full_sample4;
set full_sample4;
pct_bold = boldness/ave_boldness;
run;
proc univariate data=full_sample4;
var boldness pct_bold;
run;

*THIS NOW INCLUDES ACCURACY, BIAS, AND BOLDNESS FOR ALL FIRM-QUARTER ESTIMATES;
data mylib.ibes_matched_sample;
set full_sample4;
run;




*DUE TO SIGNIFICANT DIFFERENCES IN AGE, WILL ASSIGN ALL FORECASTS INTO SPECIFIC FORECAST HORIZON GROUPS;


data age;
set mylib.ibes_matched_sample;
if 30<=forecast_age<=90 then group1 = 1;
else group1 =0;
if 10<=forecast_age<=29 then group2 = 1;
else group2 =0;
if 5<=forecast_age<=9 then group3 = 1;
else group3 =0;
if 1<=forecast_age<=4 then group4 = 1;
else group4 =0;
if 0<=forecast_age<=0 then group5 = 1;
else group5 =0;
signed_ferr_prc2 = signed_ferr_prc *100;
run;

proc univariate data=age;
var signed_ferr_prc2;
where estimize_sample ~=1;
run;


data test;
set age;
where estimize_sample =1;
run;
proc sort nodupkey data=test out=test2;
by estimize_sample ticker anndats_act;
run;


proc means data=test2;
var group1 group2 group3 group4 group5;
run;



**Here I report the results for Group 1 (forecast age of 30 to 90);
*** can change group number to report results for other forecast age groups;

data age1;
set age;
*simply change the group number to look at other event windows;
if group1 =1;
run;
proc sort data=age1;
by id oftic anndats_act ;
run;

proc univariate noprint data=age1;
by id oftic anndats_act ;
var pmafe pmsfe signed_ferr_prc  bias_dummy pct_bold;
output out=est mean = est_pmafe est_pmsfe est_signed_ferr_prc est_bias_dummy est_pct_bold;
where estimize_sample =1;
run;





proc univariate noprint data=age1;
by id oftic anndats_act ;
var pmafe pmsfe signed_ferr_prc  bias_dummy pct_bold;
output out=ibes mean = ibes_pmafe ibes_pmsfe ibes_signed_ferr_prc ibes_bias_dummy ibes_pct_bold;
where estimize_sample ~=1;
run;

data est;
merge est ibes;
by id oftic anndats_act ;
dif_pmafe = est_pmafe - ibes_pmafe;
dif_pmsfe = est_pmsfe - ibes_pmsfe;
dif_singed_ferr_prc = est_signed_ferr_prc - ibes_signed_ferr_prc;
dif_bias = est_bias_dummy - ibes_bias_dummy;
dif_bold = est_pct_bold - ibes_pct_bold;
run;


data bias;
set est;
keep oftic anndats_act ibes_bias_dummy est_bias_dummy ibes_signed_ferr_prc;
run;

proc sort data=bias;
by oftic anndats_act;
run;
proc sort data=results;
by oftic anndats_act;
run;
data merged;
merge bias results;
by oftic anndats_act;
run;

proc corr data=merged;
var ibes_bias_dummy ibes_bias2 numest;
run;
proc means data=merged;
var ibes_bias_dummy ibes_bias2;
run;

proc means data=merged;
var ibes_bias_dummy ibes_bias2;
where est_bias_dummy =.;
run;











proc means data=est;
var  est_bias_dummy ibes_bias_dummy

est_pmsfe ibes_pmsfe


 est_signed_ferr_prc  ibes_signed_ferr_prc;
    

where dif_pmafe ~=.;
run;



proc means data=est;
var est_pct_bold ibes_pct_bold dif_bold;
where dif_bold ~=.;
run;


proc surveyreg data=est;
cluster oftic;
model dif_pmafe = ;
quit;

proc surveyreg data=est;
cluster oftic;
model dif_singed_ferr_prc = ;
quit;
proc surveyreg data=est;
cluster oftic;
model dif_bold = ;
quit;

****

								Part 4: Consensus Forecast Accuracy (Tables, 5, 6, 7, and 8) 

****

 we again start with the estimize-ibes matched sample (Panel B of Table 1);



****Below are results comparing to the un-adjusted IBES sample (Table 5);

data estimize3;
set mylib.estimize_filtered;
drop obs;
run;


data estimates;
set mylib.ibes_consensus;
year = year(anndats_act);
run;
proc sort data=estimates;
by year;
run;



data ibes;
set mylib.detail_forecasts;
if fpi =6;
est_year = year(anndats);
if 2012<=est_year <=2013;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
forecast_age = ANNDATS_ACT - anndats;
if 0<=forecast_age<=90;
if ANALYS =0  then delete;
run;


*will now require at least on estimize forecast;

proc sort nodupkey data=estimize3 out=sample;
by ticker ANNDATS_ACT;
run;
proc sql;
create table ibes2 as
select *
from ibes, sample
where ibes.oftic = sample.ticker and ibes.anndats_act = sample.anndats_act;
quit;

data ibes3;
set ibes2;
estimize_sample=0;
abs_ferr = abs (value - actual);
signed_ferr =   actual - value;
ticker = oftic;
 if ANALYS = 0 then delete;
keep ticker forecast_age actual estimize_sample anndats  forecast_age value abs_ferr signed_ferr ANNDATS_ACT ANALYS;
run;

data pmafe;
set estimize3 ibes3;
run;

proc sort nodupkey data=estimize3 out=check;
by ticker anndats_act;
run;


proc sort data=pmafe;
by ticker anndats_act;
run;
proc univariate noprint data=pmafe;
by ticker anndats_act;
var abs_ferr   ;
output out=results mean= ave_abs_ferr N=obs;
run;



proc sql;
create table pmafe2 as
select *
from pmafe, results
where pmafe.ticker = results.ticker and pmafe.anndats_act = results.anndats_act;
quit;


proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;

data check;
set check;
if estimize_sample =1;
run;



data pmafe2;
set pmafe2;
year = year(anndats_act);
if obs >=5;
run;

proc reg data=pmafe2;
model actual = value;
where estimize_sample =0 and year =2013;
quit;



proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;

data pmafe30;
set pmafe2;
*** just change this filter to test different holding periods;
if forecast_age >=60;
run;

*I want to focus on the most recent forecast for a given analyst-firm-quarter;

proc sort data=pmafe30;
by ticker anndats_act ANALYS username forecast_age;
run;

PROC SORT NODUPKEY DATA=PMAFE30 OUT=PMAFE30B;
by ticker anndats_act ANALYS username ;
run;
PROC MEANS DATA=PMAFE30; VAR FORECAST_AGE;RUN;
PROC MEANS DATA=PMAFE30B; VAR FORECAST_AGE;RUN;




proc sort data=PMAFE30B;
by ticker anndats_act;
run;
proc univariate noprint data=PMAFE30B;
by ticker anndats_act;
var estimize_sample;
output out=results mean=ave_estimize_sample;
run;

proc sql;
create table PMAFE30C as
select *
from PMAFE30B, results
where PMAFE30B.ticker = results.ticker and PMAFE30B.anndats_act = results.anndats_act;
quit;

data PMAFE30C;
set PMAFE30C;
*REQUIRE AT LEAST ONE ESTIMIZE FORECAST;
IF ave_estimize_sample =0 THEN DELETE;
*REQUIRE AT LEAST ONE IBES FORECAST;
IF ave_estimize_sample =1 THEN DELETE;
*pmafe = (abs_ferr - ave_abs_ferr)/ave_abs_ferr;
*adj_forecast_age = forecast_age - ave_forecast_age;
*adj_estimize_sample = estimize_sample - ave_estimize_sample;
run;

proc sort data=PMAFE30C;
by ticker anndats_act ave_abs_ferr;
run;
proc univariate noprint data=PMAFE30C;
by ticker anndats_act ave_abs_ferr ACTUAL ;
var VALUE;
output out=IBES_results mean= IBES_VALUE N=IBES_obs;
WHERE ESTIMIZE_SAMPLE =0;
run;




proc univariate noprint data=PMAFE30C;
by ticker anndats_act ave_abs_ferr ACTUAL;
var VALUE;
output out=ESTIMIZE_results mean= ESTIMIZE_VALUE N=ESTIMIZE_obs;
WHERE ESTIMIZE_SAMPLE =1;
run;

DATA RESULTS;
MERGE IBES_RESULTS ESTIMIZE_RESULTS;
by ticker anndats_act ave_abs_ferr;

total_obs = estimize_obs + ibes_obs;
estimize_weight = estimize_obs/total_obs;
ibes_weight = ibes_obs/total_obs;
weighted_VALUE  = estimize_weight * estimize_value + ibes_weight * ibes_value;



IBES_AFE = ABS(IBES_VALUE - ACTUAL);
ESTIMIZE_AFE = ABS(ESTIMIZE_VALUE - ACTUAL);
WEIGHTED_AVE_AFE = ABS(WEIGHTED_VALUE - ACTUAL);

IBES_ACC = (IBES_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;
ESTIMIZE_ACC = (ESTIMIZE_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;
WEIGHTED_AVE_ACC = (WEIGHTED_AVE_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;

weighted_ave_minus_ibes = WEIGHTED_AVE_ACC - ibes_acc;

if WEIGHTED_AVE_ACC < ibes_acc then weighted_average_wins =1;
if WEIGHTED_AVE_ACC > ibes_acc then weighted_average_wins =0;
weighted_average_wins2 = weighted_average_wins - .5;
RUN;



proc univariate data=RESULTS;
var   estimize_acc ibes_acc weighted_ave_acc    weighted_ave_minus_ibes 
 weighted_average_wins2;
run;


results below report the values for Table 5 (for horizon chosen);
proc surveyreg data=results;
cluster ticker;
model estimize_acc =;
quit;
proc surveyreg data=results;
cluster ticker;
model ibes_acc =;
quit;
proc surveyreg data=results;
cluster ticker;
model weighted_ave_acc =;
quit;
proc surveyreg data=results;
cluster ticker;
model weighted_ave_minus_ibes =;
quit;
proc surveyreg data=results;
cluster ticker;
model weighted_average_wins2 =;
quit;







*below are results comparing to the de-biased IBES sample ( Table 6);


data estimize3;
set mylib.estimize_filtered;
drop obs;
run;



data ibes;
set mylib.detail_forecasts;
if fpi =6;
est_year = year(anndats);
if 2012<=est_year <=2013;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
forecast_age = ANNDATS_ACT - anndats;
if 0<=forecast_age<=90;

*the step below de-biases IBES forecasts by adding 0.02 to the intercept and multiplying the ibes forecast by 1.02;
value = value*1.02 + 0.02;
if ANALYS =0  then delete;
run;


*remaining steps are identical to the computation needed for Table 5;


*will now require at least on estimize forecast;

proc sort nodupkey data=estimize3 out=sample;
by ticker ANNDATS_ACT;
run;
proc sql;
create table ibes2 as
select *
from ibes, sample
where ibes.oftic = sample.ticker and ibes.anndats_act = sample.anndats_act;
quit;

data ibes3;
set ibes2;
estimize_sample=0;
abs_ferr = abs (value - actual);
signed_ferr =   actual - value;
ticker = oftic;
 if ANALYS = 0 then delete;
keep ticker forecast_age actual estimize_sample anndats  forecast_age value abs_ferr signed_ferr ANNDATS_ACT ANALYS;
run;

data pmafe;
set estimize3 ibes3;
run;

proc sort nodupkey data=estimize3 out=check;
by ticker anndats_act;
run;


proc sort data=pmafe;
by ticker anndats_act;
run;
proc univariate noprint data=pmafe;
by ticker anndats_act;
var abs_ferr   ;
output out=results mean= ave_abs_ferr N=obs;
run;



proc sql;
create table pmafe2 as
select *
from pmafe, results
where pmafe.ticker = results.ticker and pmafe.anndats_act = results.anndats_act;
quit;


proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;

data check;
set check;
if estimize_sample =1;
run;



data pmafe2;
set pmafe2;
year = year(anndats_act);
if obs >=5;
run;

proc reg data=pmafe2;
model actual = value;
where estimize_sample =0 and year =2013;
quit;



proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;

data pmafe30;
set pmafe2;
*** just change this filter to test different holding periods;
if forecast_age >=60;
run;

*I want to focus on the most recent forecast for a given analyst-firm-quarter;

proc sort data=pmafe30;
by ticker anndats_act ANALYS username forecast_age;
run;

PROC SORT NODUPKEY DATA=PMAFE30 OUT=PMAFE30B;
by ticker anndats_act ANALYS username ;
run;
PROC MEANS DATA=PMAFE30; VAR FORECAST_AGE;RUN;
PROC MEANS DATA=PMAFE30B; VAR FORECAST_AGE;RUN;




proc sort data=PMAFE30B;
by ticker anndats_act;
run;
proc univariate noprint data=PMAFE30B;
by ticker anndats_act;
var estimize_sample;
output out=results mean=ave_estimize_sample;
run;

proc sql;
create table PMAFE30C as
select *
from PMAFE30B, results
where PMAFE30B.ticker = results.ticker and PMAFE30B.anndats_act = results.anndats_act;
quit;

data PMAFE30C;
set PMAFE30C;
*REQUIRE AT LEAST ONE ESTIMIZE FORECAST;
IF ave_estimize_sample =0 THEN DELETE;
*REQUIRE AT LEAST ONE IBES FORECAST;
IF ave_estimize_sample =1 THEN DELETE;
*pmafe = (abs_ferr - ave_abs_ferr)/ave_abs_ferr;
*adj_forecast_age = forecast_age - ave_forecast_age;
*adj_estimize_sample = estimize_sample - ave_estimize_sample;
run;

proc sort data=PMAFE30C;
by ticker anndats_act ave_abs_ferr;
run;
proc univariate noprint data=PMAFE30C;
by ticker anndats_act ave_abs_ferr ACTUAL ;
var VALUE;
output out=IBES_results mean= IBES_VALUE N=IBES_obs;
WHERE ESTIMIZE_SAMPLE =0;
run;




proc univariate noprint data=PMAFE30C;
by ticker anndats_act ave_abs_ferr ACTUAL;
var VALUE;
output out=ESTIMIZE_results mean= ESTIMIZE_VALUE N=ESTIMIZE_obs;
WHERE ESTIMIZE_SAMPLE =1;
run;

DATA RESULTS;
MERGE IBES_RESULTS ESTIMIZE_RESULTS;
by ticker anndats_act ave_abs_ferr;

total_obs = estimize_obs + ibes_obs;
estimize_weight = estimize_obs/total_obs;
ibes_weight = ibes_obs/total_obs;
weighted_VALUE  = estimize_weight * estimize_value + ibes_weight * ibes_value;



IBES_AFE = ABS(IBES_VALUE - ACTUAL);
ESTIMIZE_AFE = ABS(ESTIMIZE_VALUE - ACTUAL);
WEIGHTED_AVE_AFE = ABS(WEIGHTED_VALUE - ACTUAL);

IBES_ACC = (IBES_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;
ESTIMIZE_ACC = (ESTIMIZE_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;
WEIGHTED_AVE_ACC = (WEIGHTED_AVE_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;



weighted_ave_minus_ibes = WEIGHTED_AVE_ACC - ibes_acc;



if WEIGHTED_AVE_ACC < ibes_acc then weighted_average_wins =1;
if WEIGHTED_AVE_ACC > ibes_acc then weighted_average_wins =0;
weighted_average_wins2 = weighted_average_wins - .5;
RUN;



proc univariate data=RESULTS;
var   estimize_acc ibes_acc weighted_ave_acc    weighted_ave_minus_ibes 
 weighted_average_wins2;
run;


results below report the values for Table 6;;
proc surveyreg data=results;
cluster ticker;
model estimize_acc =;
quit;
proc surveyreg data=results;
cluster ticker;
model ibes_acc =;
quit;
proc surveyreg data=results;
cluster ticker;
model weighted_ave_acc =;
quit;
proc surveyreg data=results;
cluster ticker;
model weighted_ave_minus_ibes =;
quit;
proc surveyreg data=results;
cluster ticker;
model weighted_average_wins2 =;
quit;



 next we examine the cross-sectional determinants of the incremental usefulness of the Estimize Consensus (Table 7);



* start with IBES-Estimize match sample;

data estimize3;
set mylib.estimize_filtered;
drop obs;
run;


data ibes;
set mylib.detail_forecasts;
if fpi =6;
est_year = year(anndats);
if 2011<=est_year <=2013;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
forecast_age = ANNDATS_ACT - anndats;
if 0<=forecast_age<=90;
if ANALYS =0  then delete;
run;


*will merge with Byoung-Hyoung Hwang's data available here: http://www.bhwang.com/research.html;

data permno;
set mylib.ibes_ticker_permno;
end_year = year(enddate);
if end_year >=2011;
keep permno ticker ;
run;

proc sql;
create table ibes as
select *
from ibes, permno
where ibes.ticker = permno.ticker;
quit;




proc sort nodupkey data=estimize3 out=sample;
by ticker ANNDATS_ACT;
run;
proc sql;
create table ibes2 as
select *
from ibes, sample
where ibes.oftic = sample.ticker and ibes.anndats_act = sample.anndats_act;
quit;

data ibes3;
set ibes2;
estimize_sample=0;
abs_ferr = abs (value - actual);
signed_ferr =   actual - value;

ticker = oftic;
 if ANALYS = 0 then delete;
keep ticker forecast_age actual estimize_sample anndats  forecast_age value abs_ferr signed_ferr ANNDATS_ACT ANALYS;
run;

data pmafe;
set estimize3 ibes3;
run;

proc sort nodupkey data=estimize3 out=check;
by ticker anndats_act;
run;


proc sort data=pmafe;
by ticker anndats_act;
run;
proc univariate noprint data=pmafe;
by ticker anndats_act;
var abs_ferr   ;
output out=results mean= ave_abs_ferr N=obs;
run;



proc sql;
create table pmafe2 as
select *
from pmafe, results
where pmafe.ticker = results.ticker and pmafe.anndats_act = results.anndats_act;
quit;


proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;

data check;
set check;
if estimize_sample =1;
run;



data pmafe2;
set pmafe2;
if obs >=5;
run;

proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;

data pmafe30;
set pmafe2;
*** just change this filter to test different holding periods;
if forecast_age >=5;
run;

*I want to focus on the most recent forecast for a given analyst-firm-quarter;

proc sort data=pmafe30;
by ticker anndats_act ANALYS username forecast_age;
run;

PROC SORT NODUPKEY DATA=PMAFE30 OUT=PMAFE30B;
by ticker anndats_act ANALYS username ;
run;
PROC MEANS DATA=PMAFE30; VAR FORECAST_AGE;RUN;
PROC MEANS DATA=PMAFE30B; VAR FORECAST_AGE;RUN;




proc sort data=PMAFE30B;
by ticker anndats_act;
run;
proc univariate noprint data=PMAFE30B;
by ticker anndats_act;
var estimize_sample;
output out=results mean=ave_estimize_sample;
run;

proc sql;
create table PMAFE30C as
select *
from PMAFE30B, results
where PMAFE30B.ticker = results.ticker and PMAFE30B.anndats_act = results.anndats_act;
quit;

data PMAFE30C;
set PMAFE30C;
*REQUIRE AT LEAST ONE ESTIMIZE FORECAST;
IF ave_estimize_sample =0 THEN DELETE;
*REQUIRE AT LEAST ONE IBES FORECAST;
IF ave_estimize_sample =1 THEN DELETE;
*pmafe = (abs_ferr - ave_abs_ferr)/ave_abs_ferr;
*adj_forecast_age = forecast_age - ave_forecast_age;
*adj_estimize_sample = estimize_sample - ave_estimize_sample;
run;

proc sort data=PMAFE30C;
by ticker anndats_act ave_abs_ferr;
run;
proc univariate noprint data=PMAFE30C;
by ticker anndats_act ave_abs_ferr ACTUAL ;
var VALUE forecast_age;
output out=IBES_results mean= IBES_VALUE ibes_age N=IBES_obs;
WHERE ESTIMIZE_SAMPLE =0;
run;




proc univariate noprint data=PMAFE30C;
by ticker anndats_act ave_abs_ferr ACTUAL;
var VALUE forecast_age;
output out=ESTIMIZE_results mean= ESTIMIZE_VALUE estimize_age N=ESTIMIZE_obs;
WHERE ESTIMIZE_SAMPLE =1;
run;


DATA RESULTS;
MERGE IBES_RESULTS ESTIMIZE_RESULTS;
by ticker anndats_act ave_abs_ferr;

total_obs = estimize_obs + ibes_obs;
estimize_weight = estimize_obs/total_obs;
ibes_weight = ibes_obs/total_obs;
weighted_VALUE  = estimize_weight * estimize_value + ibes_weight * ibes_value;

*weighted_VALUE  = .5 * estimize_value + .5 * ibes_value;



IBES_AFE = ABS(IBES_VALUE - ACTUAL);
ESTIMIZE_AFE = ABS(ESTIMIZE_VALUE - ACTUAL);
WEIGHTED_AVE_AFE = ABS(WEIGHTED_VALUE - ACTUAL);

IBES_ACC = (IBES_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;
ESTIMIZE_ACC = (ESTIMIZE_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;
WEIGHTED_AVE_ACC = (WEIGHTED_AVE_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;



weighted_ave_minus_ibes = WEIGHTED_AVE_ACC - ibes_acc;



if WEIGHTED_AVE_ACC < ibes_acc then weighted_average_wins =1;
if WEIGHTED_AVE_ACC > ibes_acc then weighted_average_wins =0;
weighted_average_wins2 = weighted_average_wins - .5;
dif_age = estimize_age - ibes_age;
contributor_ratio = estimize_obs/ibes_obs;
log_est_obs = log(estimize_obs);
log_ibes_obs = log(ibes_obs);
dif_obs = log_est_obs - log_ibes_obs;
if estimize_obs >3 then est3 =1;
else est3 =0;
if estimize_obs >3 then est3 =1;
else est3 =0;
if ibes_obs >10 then ibes10 =1;
else ibes10 =0;
log_est_age = log(1+estimize_age);
log_ibes_age = log(1+ibes_age);
dif_acc = estimize_acc - ibes_acc;
year = year(ANNDATS_ACT);
month = month(anndats_act);
if month <=3 then q=1;
if 4<=month <=6 then q=2;
if 7<=month <=9 then q=3;
if 10<=month <=12 then q=4;
cum_quarter = (year - 2012)*4 + q;
tracker = _N_;
RUN;

/*
data mylib.table5_sample;
set results;
run;
*/


proc sort data=results;
by ticker cum_quarter;
run;




proc univariate data=RESULTS;
var   estimize_acc ibes_acc weighted_ave_acc    weighted_ave_minus_ibes 
 weighted_average_wins2 estimize_age ibes_age dif_acc;
run;


data results;
set results;
weighted_ave_minus_ibes2 = weighted_ave_minus_ibes;
*weighted_ave_minus_ibes2 winsorizes values at -1.3 and 0.7;
if weighted_ave_minus_ibes2 <-1.3 then weighted_ave_minus_ibes2 =-1.3;
if weighted_ave_minus_ibes2 >0.7 then weighted_ave_minus_ibes2 =0.7;
dif_acc2 = dif_acc;
*dif_acc2 winsorized at plus and minus 2;
if dif_acc2 <-2 then dif_acc2 = -2;
if dif_acc2 >2 then dif_acc2 =2;
run;
proc univariate data=results;
var weighted_ave_minus_ibes weighted_ave_minus_ibes2 dif_acc2;
run;






*now want to merge wtih stock characteristics;

data results;
set results;
char_year = year(anndats_act) -1;
obs_num = _N_;
run;
data char;
set mylib.char2;
char_year =year;
run;

proc sql;
create table results2 as
select *
from results, char
where results.ticker = char.ticker and results.char_year = char.char_year;
quit;
 proc sort nodupkey data=results2;
 by obs_num;
 run;

data results2;
set results2;
if log_bm = . then delete;
if log_turn = . then delete;
if log_size = . then delete;
if log_vol = . then delete;
run;

now want to standardize variables;

proc univariate noprint data=results2;
var estimize_age ibes_age log_est_obs log_ibes_obs log_size log_bm log_turn log_vol;
output out=std mean = ave_estimize_age ave_ibes_age ave_log_est_obs ave_log_ibes_obs ave_log_size ave_log_bm ave_log_turn ave_log_vol
std = std_estimize_age std_ibes_age std_log_est_obs std_log_ibes_obs std_log_size std_log_bm std_log_turn std_log_vol;
run;
proc sql;
create table results2 as
select *
from results2, std;
quit;

data results3;
set results2;
estimize_age  = (estimize_age - ave_estimize_age)/std_estimize_age;
ibes_age  = (ibes_age - ave_ibes_age)/std_ibes_age;

log_est_obs  = (log_est_obs - ave_log_est_obs)/std_log_est_obs;
log_ibes_obs  = (log_ibes_obs - ave_log_ibes_obs)/std_log_ibes_obs;

log_size  = (log_size - ave_log_size)/std_log_size;
log_bm  = (log_bm - ave_log_bm)/std_log_bm;
log_turn  = (log_turn - ave_log_turn)/std_log_turn;
log_vol  = (log_vol - ave_log_vol)/std_log_vol;
drop ave_estimize_age ave_ibes_age ave_log_est_obs ave_log_ibes_obs ave_log_size ave_log_bm ave_log_turn ave_log_vol
std_estimize_age std_ibes_age std_log_est_obs std_log_ibes_obs std_log_size std_log_bm std_log_turn std_log_vol;
run;



proc surveyreg data=results3;
cluster ticker;
model weighted_ave_minus_ibes2 = estimize_age ibes_age log_est_obs log_ibes_obs log_size log_bm log_turn log_vol;
ods output parameterestimates=spec2;
quit;



proc surveyreg data=results3;
cluster ticker;
model dif_acc = estimize_age ibes_age log_est_obs log_ibes_obs log_size log_bm log_turn log_vol;
ods output parameterestimates=spec2;
quit;



data results3;
set results3;
weighted_average_wins3 = weighted_average_wins2 *-1;
run;

proc surveylogistic data=results3;
cluster ticker;
model weighted_average_wins3 = estimize_age ibes_age log_est_obs log_ibes_obs log_size log_bm log_turn log_vol/rsq;
ods output parameterestimates=spec4;
ods output  OddsRatios =spec5;
quit;





data spec4;
set spec4;
if Variable = 'Intercept' then delete;
 t= WaldChiSq ** .5;
 if estimate >0 then sign =1;
 else sign = -1;
 OddsRatioEst = t * sign;
 obs = _N_ - .5;
 keep OddsRatioEst obs;
run;




data spec5;
set spec5;
obs = _n_;
*keep parameter OddsRatioEst;
run;
data spec4;
set spec4 spec5;
keep obs OddsRatioEst;
run;
proc sort data=spec4;
by obs;
run;



data results1;
set spec2;
est1 = Estimate;
tstat1 = tValue;
keep parameter est1 tstat1;
run;



data parm;
set results1;
obs = _n_;
keep parameter est1 obs;
run;
data tstat;
set results1;
obs = _n_ + .5;
est1 = tstat1;
keep parameter est1 obs;
run;


data spec1;
set parm tstat;
run;
proc sort data=spec1;
by obs;
run;
proc sort nodupkey data=spec2;
by obs;
run;

proc sort nodupkey data=spec4;
by obs;
run;


data table7;
merge spec1 spec4 ;
OddsRatioEst2 = OddsRatioEst *1;
by obs;
run;






** now will report estimates of regressions of actual eps on ibes and estimize forecasts issued on the same day
(if multiple forecasts are issued on the same day, then the forecasts are averaged into a consensus);





data estimize3;
set mylib.estimize_filtered;
if cum_quarter =9 then delete;
DROP OBS;
run;


data ibes;
set mylib.detail_forecasts;
if fpi =6;
est_year = year(anndats);
if 2012<=est_year <=2013;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
if cum_quarter = 9 then delete;
forecast_age = ANNDATS_ACT - anndats;
if 0<=forecast_age<=90;
run;





*will now require at least on estimize forecast;

proc sort nodupkey data=estimize3 out=sample;
by ticker ANNDATS_ACT;
run;
proc sql;
create table ibes2 as
select *
from ibes, sample
where ibes.oftic = sample.ticker and ibes.anndats_act = sample.anndats_act;
quit;

data ibes3;
set ibes2;
estimize_sample=0;
abs_ferr = abs (value - actual);
signed_ferr =   actual - value;
ticker = oftic;
 if ANALYS = 0 then delete;
keep ticker forecast_age actual estimize_sample anndats  forecast_age value abs_ferr signed_ferr ANNDATS_ACT ANALYS;
run;

data pmafe;
set estimize3 ibes3;
run;




proc sort nodupkey data=estimize3 out=check;
by ticker anndats_act;
run;


proc sort data=pmafe;
by ticker anndats_act;
run;
proc univariate noprint data=pmafe;
by ticker anndats_act;
var abs_ferr   ;
output out=results mean= ave_abs_ferr N=obs;
run;



proc sql;
create table pmafe2 as
select *
from pmafe, results
where pmafe.ticker = results.ticker and pmafe.anndats_act = results.anndats_act;
quit;




proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;



data pmafe2;
set pmafe2;
if obs >=5;
run;



proc sort data=pmafe2;
by ticker  anndats_act forecast_age actual;
run;
proc means noprint data=pmafe2;
by ticker  anndats_act forecast_age actual;
var value;
output out=est_value mean=est_value N=est_obs;
where estimize_sample =1;
run;




proc means noprint data=pmafe2;
by ticker  anndats_act forecast_age actual;
var value;
output out=ibes_value mean=ibes_value N=ibes_obs;
where estimize_sample ~=1;
run;

data aa2;
set ibes_value;
if ticker = 'AA';
run;




data same_time;
merge est_value ibes_value;
by ticker  anndats_act forecast_age ;
if ibes_value = . then delete;
if est_value = . then delete;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
obs_num = _N_;
TOTAL_OBS = EST_OBS + IBES_OBS;
COMBINED_VALUE = (EST_OBS/TOTAL_OBS)*EST_VALUE + IBES_OBS/TOTAL_OBS * IBES_Value;
log_est_obs = log(est_obs);
log_ibes_obs = log(ibes_obs);
est_value_est_obs = est_value * log_est_obs;
est_value_ibes_obs = est_value * log_ibes_obs;

ibes_value_est_obs = ibes_value * log_est_obs;
ibes_value_ibes_obs = ibes_value * log_ibes_obs;
run;




* specification 1 of table 8;

proc surveyreg data=same_time;
cluster TICKER;
model actual = est_value;
quit;
* specification 2 of table 8 (and also saves residuals);

proc surveyreg data=same_time;
cluster TICKER;
model actual = ibes_value;
OUTPUT OUT=IBES_RESID RESIDUAL=RESIDUALS;
quit;



DATA IBES_RESID;
SET IBES_RESID;
ABS_IBES_RESID = ABS(RESIDUALS);
KEEP OBS_NUM ABS_IBES_RESID;
RUN;

* specification 3 of table 8 (and also saves residuals);

proc surveyreg data=same_time;
cluster TICKER;
model actual = combined_value;
OUTPUT OUT=COMBINED_RESID RESIDUAL=COMBINED_RESIDUALS;
quit;

DATA COMBINED_RESID;
SET COMBINED_RESID;
ABS_COMBINED_RESID = ABS(COMBINED_RESIDUALS);
KEEP OBS_NUM ABS_COMBINED_RESID ticker;
RUN;

DATA COMBINED_RESID;
MERGE COMBINED_RESID IBES_RESID;
BY OBS_NUM;
DIF = ABS_COMBINED_RESID -ABS_IBES_RESID;
IF DIF <0 THEN ACCURATE =.5;
IF DIF >0 THEN ACCURATE = -.5;
RUN;

PROC UNIVARIATE DATA=COMBINED_RESID;
VAR ACCURATE;
RUN;
*specification 3 results in a better prediction than specification 2 in roughly 54% of contemporaneous forecasts (t = 2.83);
proc surveyreg data=COMBINED_RESID;
cluster ticker;
model accurate = ;
quit;


*reports specification 4;

proc surveyreg data=same_time;
cluster TICKER;
model actual = est_value ibes_value;
quit;

*reports specification 5;
proc surveyreg data=same_time;
cluster TICKER;
model actual = est_value ibes_value est_value_est_obs est_value_ibes_obs ibes_value_est_obs ibes_value_ibes_obs log_est_obs log_ibes_obs;
quit;




****

								Part 5: Earnings Response Coefficients (Table 9)

****


*will begin by copying the forecast accuracy code from Part 4 but now changing the forecast age requirement to 2 days;


 begin copying forecast accuracy code *;

data estimize3;
set mylib.estimize_filtered;
if cum_quarter =9 then delete;
DROP OBS;
run;




data ibes;
set mylib.detail_forecasts;
if fpi =6;
est_year = year(anndats);
if 2012<=est_year <=2013;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
forecast_age = ANNDATS_ACT - anndats;
if 0<=forecast_age<=90;
if ANALYS =0  then delete;
run;
run;





*will now require at least on estimize forecast;

proc sort nodupkey data=estimize3 out=sample;
by ticker ANNDATS_ACT;
run;
proc sql;
create table ibes2 as
select *
from ibes, sample
where ibes.oftic = sample.ticker and ibes.anndats_act = sample.anndats_act;
quit;

data ibes3;
set ibes2;
estimize_sample=0;
abs_ferr = abs (value - actual);
signed_ferr =   actual - value;

ticker = oftic;
 if ANALYS = 0 then delete;
keep ticker forecast_age actual estimize_sample anndats  forecast_age value abs_ferr signed_ferr ANNDATS_ACT ANALYS;
run;

data pmafe;
set estimize3 ibes3;
run;

proc sort nodupkey data=estimize3 out=check;
by ticker anndats_act;
run;


proc sort data=pmafe;
by ticker anndats_act;
run;
proc univariate noprint data=pmafe;
by ticker anndats_act;
var abs_ferr   ;
output out=results mean= ave_abs_ferr N=obs;
run;



proc sql;
create table pmafe2 as
select *
from pmafe, results
where pmafe.ticker = results.ticker and pmafe.anndats_act = results.anndats_act;
quit;


proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;



data pmafe2;
set pmafe2;
if obs >=5;
run;

proc sort nodupkey data=pmafe2 out=check;
by ticker anndats_act;
run;



data pmafe30;
set pmafe2;
 focus on the consensus forecast as of two days before the earnings announcement;
if forecast_age >=2;
run;

*I want to focus on the most recent forecast for a given analyst-firm-quarter;

proc sort data=pmafe30;
by ticker anndats_act ANALYS username forecast_age;
run;

PROC SORT NODUPKEY DATA=PMAFE30 OUT=PMAFE30B;
by ticker anndats_act ANALYS username ;
run;
PROC MEANS DATA=PMAFE30; VAR FORECAST_AGE;RUN;
PROC MEANS DATA=PMAFE30B; VAR FORECAST_AGE;RUN;




proc sort data=PMAFE30B;
by ticker anndats_act;
run;
proc univariate noprint data=PMAFE30B;
by ticker anndats_act;
var estimize_sample;
output out=results mean=ave_estimize_sample;
run;

proc sql;
create table PMAFE30C as
select *
from PMAFE30B, results
where PMAFE30B.ticker = results.ticker and PMAFE30B.anndats_act = results.anndats_act;
quit;

data PMAFE30C;
set PMAFE30C;
*REQUIRE AT LEAST ONE ESTIMIZE FORECAST;
IF ave_estimize_sample =0 THEN DELETE;
*REQUIRE AT LEAST ONE IBES FORECAST;
IF ave_estimize_sample =1 THEN DELETE;
*pmafe = (abs_ferr - ave_abs_ferr)/ave_abs_ferr;
*adj_forecast_age = forecast_age - ave_forecast_age;
*adj_estimize_sample = estimize_sample - ave_estimize_sample;
run;

proc sort data=PMAFE30C;
by ticker anndats_act ave_abs_ferr;
run;
proc univariate noprint data=PMAFE30C;
by ticker anndats_act ave_abs_ferr ACTUAL ;
var VALUE;
output out=IBES_results mean= IBES_VALUE N=IBES_obs;
WHERE ESTIMIZE_SAMPLE =0;
run;




proc univariate noprint data=PMAFE30C;
by ticker anndats_act ave_abs_ferr ACTUAL;
var VALUE;
output out=ESTIMIZE_results mean= ESTIMIZE_VALUE N=ESTIMIZE_obs;
WHERE ESTIMIZE_SAMPLE =1;
run;

DATA RESULTS;
MERGE IBES_RESULTS ESTIMIZE_RESULTS;
by ticker anndats_act ave_abs_ferr;

total_obs = estimize_obs + ibes_obs;
estimize_weight = estimize_obs/total_obs;
ibes_weight = ibes_obs/total_obs;
weighted_VALUE  = estimize_weight * estimize_value + ibes_weight * ibes_value;



IBES_AFE = ABS(IBES_VALUE - ACTUAL);
ESTIMIZE_AFE = ABS(ESTIMIZE_VALUE - ACTUAL);
WEIGHTED_AVE_AFE = ABS(WEIGHTED_VALUE - ACTUAL);

IBES_ACC = (IBES_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;
ESTIMIZE_ACC = (ESTIMIZE_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;
WEIGHTED_AVE_ACC = (WEIGHTED_AVE_AFE - AVE_ABS_FERR)/AVE_ABS_FERR;



weighted_ave_minus_ibes = WEIGHTED_AVE_ACC - ibes_acc;



if WEIGHTED_AVE_ACC < ibes_acc then weighted_average_wins =1;
if WEIGHTED_AVE_ACC > ibes_acc then weighted_average_wins =0;
weighted_average_wins2 = weighted_average_wins - .5;
RUN;

 end copying forecast accuracy code *;

*cumday is simply a file that counts the number of trading days: Jan 3 2012 = day 1, Jan 4th 2012 = Day 2, Dec 30th 2011 =0, etc.;
*useful for computing returns in the three trading days around the announcement;



data cumday;
set mylib.cumday;
run;

proc sql;
create table RESULTS2 as
select *
from RESULTS, cumday
where RESULTS.ANNDATS_ACT = cumday.date;
quit;




*daily returns obtained from CRSP;
data ret;
set mylib.ret11_13;
prc = abs(prc);
abn_ret = ret - ewretd;
abn_ret2 = ret - vwretd;
year = year(date);
keep permno ticker ret abn_ret abn_ret2 date prc year;
run;

*THIS FILE PLACES STOCKS INTO DECILES BASED ON NYSE SIZE BREAKPOINTS AT TEHE END OF THE YEAR;


data size;
set mylib.size_adjustments;
year = year(date);
month = month(date);
if month =12;
size = abs (prc * shrout);
size_rank = size;
where 10<=shrcd<=11 and EXCHCD =1;
keep year size permno size_rank;
run;

proc sort data=size;
by year;
run;
proc rank data=size out=size groups =10;
by year;
var size_rank;
run;
proc sort data=size;
by year size_rank;
run;
proc univariate noprint data=size;
by year size_rank;
var size;
output out=size_breakpoints min=min_size max = max_size;
run;
data size;
set mylib.size_adjustments;
year = year(date);
month = month(date);
if month =12;
size = abs (prc * shrout);
keep year size permno ;
run;
proc sql;
create table size as
select *
from size, size_breakpoints
where size.year = size_breakpoints.year and size_breakpoints.min_size<=size.size<=size_breakpoints.max_size;
quit;
data mylib.size_breakpoints;
set size;
year = year +1;
keep permno year size_rank size;
run;



data size;
set mylib.size_breakpoints;
run;






proc sql;
create table ret2 as
select *
from ret, size
where ret.year =size.year and ret.permno = size.permno;
quit;

proc sort data=ret2;
by date size_rank;
run;
proc univariate noprint data=ret2;
by date size_rank;
var ret;
output out=size_return mean=size_return;
run;

proc sql;
create table ret2 as
select *
from ret2, size_return
where ret2.date = size_Return.date and ret2.size_rank = size_return.size_rank;
quit;
data ret2;
set ret2;
*abn_ret3 reflects size adjusted returns;
abn_ret3 = ret - size_return;
keep permno ticker abn_ret3 abn_ret abn_ret2 date  year;
run;



data cumday;
set mylib.cumday;
run;

proc sql;
create table ret as
select *
from ret2, cumday
where ret2.date = cumday.date;
quit;

data ret;
set ret;
ret_cumday = cumday;
drop date cumday;
run;


*focus on returns in the 3-day window around earnings announcements;
proc sql;
create table erc as
select *
from results2, ret
where results2.ticker = ret.ticker and results2.cumday -1 <=ret.ret_cumday <= results2.cumday +1;
quit;

proc sort nodupkey data=erc out=test;
by ticker anndats_act;
run;

*also need price information;

data erc;
set erc;
year = year(ANNDATS_ACT);
month = month(ANNDATS_ACT);
if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;
if cum_quarter = 9 then delete;
run;




data price;
set mylib.ret11_13;
prc = abs(prc);
year = year(date);
month = month(date);


if month <=3 then quarter =1;
if 4<=month <=6 then quarter =2;
if 7<=month <=9 then quarter =3;
if month >9 then quarter =4;
cum_quarter = (year - 2012) *4 + quarter;

keep permno ticker prc date cum_quarter;
run;

proc sort data=price;
by  ticker  cum_quarter date;
run;
proc sort nodupkey data=price out=price2;
by  ticker cum_quarter;
run;

proc sql;
create table erc2 as
select *
from erc, price2
where erc.cum_quarter = price2.cum_quarter and erc.ticker = price2.ticker;
quit;
proc sort nodupkey data=erc2 out=test;
by ticker anndats_act;
run;


proc sort data=erc2;
by ticker anndats_act actual ibes_obs ibes_value ESTIMIZE_obs ESTIMIZE_VALUE weighted_value  prc;
run;
*computing 3-day CARs;
 proc univariate noprint data=erc2;
by ticker anndats_act actual ibes_obs ibes_value ESTIMIZE_obs ESTIMIZE_VALUE weighted_value   prc;
var abn_ret3;
output out=erc3 sum=abn_ret3;
run;




data erc3;
set erc3;
obs_num = _N_;
*below I compute scaled earning surprise for IBES and Estimize;
ibes_ue = (actual-ibes_value)/prc;
estimize_ue = (actual-estimize_value)/prc;
weighted_ue = (actual-weighted_value)/prc;
ibes_minus_est_ue = ibes_ue - estimize_ue;
run;

proc univariate data=erc3;
var ibes_ue estimize_ue;
run;



proc univariate  noprint data=erc3;
var ibes_ue estimize_ue weighted_ue ibes_minus_est_ue;
output out=results p1= p1_ibes_ue p1_estimize_ue p1_weighted_ue p1_ibes_minus_est_ue p99= p99_ibes_ue p99_estimize_ue p99_weighted_ue p99_ibes_minus_est_ue;
run;

proc sql;
create table erc3 as
select *
from erc3, results;
quit;


*winsorize earnings surprises at the 1st and 99th percentile;
data erc3;
set erc3;

if ibes_ue < p1_ibes_ue then ibes_ue = p1_ibes_ue;
if ibes_ue > p99_ibes_ue then ibes_ue = p99_ibes_ue;

if estimize_ue < p1_estimize_ue then estimize_ue = p1_estimize_ue;
if estimize_ue > p99_estimize_ue then estimize_ue = p99_estimize_ue;

if weighted_ue < p1_weighted_ue then weighted_ue = p1_weighted_ue;
if weighted_ue > p99_weighted_ue then weighted_ue = p99_weighted_ue;

if ibes_minus_est_ue < p1_ibes_minus_est_ue then ibes_minus_est_ue = p1_ibes_minus_est_ue;
if ibes_minus_est_ue > p99_ibes_minus_est_ue then ibes_minus_est_ue = p99_ibes_minus_est_ue;
drop p1_ibes_ue p1_estimize_ue p1_weighted_ue p1_ibes_minus_est_ue p99_ibes_ue p99_estimize_ue p99_weighted_ue p99_ibes_minus_est_ue;
run;


proc univariate data=erc3;
var ibes_ue estimize_ue;
run;



proc univariate  noprint data=erc3;
var ibes_ue estimize_ue weighted_ue ibes_minus_est_ue;
output out=results std= std_ibes_ue std_estimize_ue std_weighted_ue std_ibes_minus_est_ue;
run;

proc sql;
create table erc3 as
select *
from erc3, results;
quit;


*standardizing variables;
data erc3;
set erc3;
ibes_ue2 = ibes_ue/std_ibes_ue;
estimize_ue2 = estimize_ue/std_estimize_ue;
weighted_ue2 = weighted_ue/std_weighted_ue;
ibes_minus_est_ue2 = ibes_minus_est_ue/std_ibes_minus_est_ue;
run;

*specification 1 of Table 9;
proc surveyreg data=erc3;
cluster ticker;
model abn_ret3 = estimize_ue2;
quit;

*specification 2 of Table 9;
proc surveyreg data=erc3;
cluster ticker;
model abn_ret3 = ibes_ue2;
quit;


*specification 3 of Table 9;
proc surveyreg data=erc3;
cluster ticker;
model abn_ret3 = weighted_ue2;
quit;

*specification 4 of Table 9;
proc surveyreg data=erc3;
cluster ticker;
model abn_ret3 = estimize_ue2 ibes_ue2;
quit;




data erc3;
set erc3;
log_est_obs = log(estimize_obs);
log_ibes_obs = log(ibes_obs);
dif_obs = log_est_obs - log_ibes_obs;
*if dif_obs > 1.8 then dif_obs = 1.8;
estimize_ue2_est_obs = estimize_ue2 * log_est_obs;
estimize_ue2_ibes_obs = estimize_ue2 * log_ibes_obs;
ibes_ue2_est_obs = ibes_ue2 * log_est_obs;
ibes_ue2_ibes_obs = ibes_ue2 * log_ibes_obs;
estimize_ue2_dif_obs = estimize_ue2 * dif_obs;
ibes_ue2_dif_obs = ibes_ue2 * dif_obs;

run;


*specification 5 of Table 9;


proc surveyreg data=erc3;
cluster ticker;
model abn_ret3 = estimize_ue2 ibes_ue2 estimize_ue2_est_obs ibes_ue2_est_obs log_est_obs estimize_ue2_ibes_obs ibes_ue2_ibes_obs LOG_IBES_OBS LOG_EST_OBS;
quit;

data erc3;
set erc3;
if 0<=estimize_obs <=2 then group =1;
if 3<=estimie_obs <=5 then group =2;
if estimize_obs >5 then group =3;
run;
proc sort data=erc3;
by group;
run;

*results for Figure 2;
proc surveyreg data=erc3;
cluster ticker;
model abn_ret3 = estimize_ue2 ibes_ue2;
by group;
quit;



****

								Part 5: Price Reaction to Estimize Consensus Revisions (Table 10 and Figure 3)

****


*for market reaction tests no longer need to match Estimize and IBES so will start with the complete Estimize data ;


*will start with original data since want some additional information;
data estimize;
set mylib.estimize;
keep ticker fiscal_year fiscal_quarter epsactual epswallstreet eps  estdate username actdate flagged bio1 bio2 bio3 ;
run; 


data estimize;
set estimize;
format est_date yymmdd10.;
format ACT_date yymmdd10.;

EST_date=input(trim(ESTdate),YYMMDD10.);
ACT_date=input(trim(ACTdate),YYMMDD10.);


est_year = year(est_date);
act_year = year (act_date);
weekday = weekday(est_date);

*making a new day variable that adds 1 if the date is on Sunday and 2 if the date is on Saturday;
est_date2 = est_date;
if weekday =1 then est_date2 = est_date2 +1;
if weekday =7 then est_date2 = est_date2 +2;
FORECAST_AGE = ACT_dATE - EST_DATE2;
*delete flagged forecasts and limit sample to 2012 and 2013 forecasts;
if flagged = 'TRUE' then delete;
if 2012<=est_year <=2013;
if 2012<=act_year<=2013;
run;



proc sort  nodupkey data=estimize;
BY fiscal_year fiscal_quarter ticker   EST_dATE2 act_date epsactual forecast_age USERNAME eps;
run;

proc univariate noprint data=estimize;
BY fiscal_year fiscal_quarter ticker   EST_dATE2 act_date epsactual forecast_age USERNAME flagged ;
var eps;
output out=estimize MEAN=eps min=min_eps max=maX_eps n=OBS;
RUN;



DATA estimize;
SET estimize;
SCALED_RATIO = MAX_EPS/MIN_EPS;
*limit sample to forecasts issued between 2 and 90 days before earnings announcement;
*dropping forecast issued on days 0 and 1 since returns may reflect earnings announcement rather than revision;
if forecast_age >90 then delete;
if forecast_age <=1 then delete;
drop min_eps max_eps obs SCALED_RATIO;
RUN;

*will create a new tracker for this dataset;
data estimize;
set estimize;
obs_num = _n_;
run;



PROC UNIVARIATE NOPRINT DATA=ESTIMIZE;
BY fiscal_year fiscal_quarter ticker    EST_dATE2 act_date epsactual forecast_age;
VAR EPS;
OUTPUT OUT=ESTIMIZE2 MEAN=previous_EPS n=revisions_per_day;
run;

data estimize2;
set estimize2;
previous_forecast_age = forecast_age;
drop forecast_age;
run;


proc sql;
create table estimize as
select *
from estimize, estimize2
where estimize.fiscal_year = estimize2.fiscal_year and estimize.fiscal_quarter = estimize2.fiscal_quarter and estimize.ticker = estimize2.ticker
and estimize.forecast_age < estimize2.previous_forecast_age;
quit;

data estimize;
set estimize;
forecast_age_dif = previous_forecast_age - forecast_age;
run;
proc sort data=estimize;
by obs_num forecast_age_dif;
run;
proc sort nodupkey data=estimize out=estimize1;
by obs_num;
run;


*Now I have the previous forecast of EPS;
*In addition, want to compute a consensus forecast;

*can compute consensus by looking at average eps estimates for all prior estimates;
*also only want to compute most recent forecast for a given estimator;

data sample;
set estimize1;
keep obs_num fiscal_year fiscal_quarter forecast_age ticker;
run;

*now want all possible forecasts;
data sample2;
set mylib.estimize;
format est_date yymmdd10.;
format ACT_date yymmdd10.;

EST_date=input(trim(ESTdate),YYMMDD10.);
ACT_date=input(trim(ACTdate),YYMMDD10.);

est_year = year(est_date);
act_year = year (act_date);
weekday = weekday(est_date);

*making a new day variable that adds 1 if the date is on Sunday and 2 if the date is on Saturday;
est_date2 = est_date;
if weekday =1 then est_date2 = est_date2 +1;
if weekday =7 then est_date2 = est_date2 +2;
FORECAST_AGE = ACT_dATE - EST_DATE2;

IF FORECAST_AGE>=1;
if flagged = 'TRUE' then delete;
if 2012<=est_year <=2013;
if 2012<=act_year <=2013;

keep ticker fiscal_year fiscal_quarter   eps  forecast_age username ;
run;

proc sql;
create table sample3 as
select *
from sample, sample2
where sample.ticker = sample2.ticker and sample.fiscal_year = sample2.fiscal_year and sample.fiscal_quarter = sample2.fiscal_quarter and 
sample.forecast_age < sample2.forecast_age;
run;

*want to keep only the most recent observation for a given user;

proc sort data=sample3;
by obs_num username foreast_age;
run;
proc sort nodupkey data=sample3;
by obs_num username ;
run;

proc univariate noprint data=sample3;
by obs_num ;
var eps;
output out = consensus_eps mean=consensus_eps N=consensus_obs;
run;
proc sort data=estimiz1e;
by obs_num;
run;
proc sort data=consensus_eps;
by obs_num;
run;
data estimize;
merge estimize1 consensus_eps;
by obs_num;
run;



*now will start deleting revisions that coincide with other sources of information like forecast revisions, recommendations, and guidance;


data forecasts;
set mylib.detail_forecasts;
if oftic = '' then delete;
drop measure ACTDATS ACTTIMS ANNTIMS_ACT ticker;
run;

proc sort data=forecasts;
by ANALYS oftic fpi ;
run;


data cumday;
set mylib.cumday;
run;

proc sql;
create table forecasts2 as
select *
from forecasts, cumday
where forecasts.ANNDATS = cumday.date;
quit;

data forecasts2;
set forecasts2;
ibes_forecast_cumday = cumday;
drop cumday;
run;



*will allow for the date to differ by up to 3 days and will then chose the date closes to the actual date - this helps with holidays;



proc sql;
create table estimize2 as
select *
from estimize, cumday
where  cumday.date -3 <=estimize.est_date2 <= cumday.date;
quit;

data estimize2;
set estimize2;
dif = date - est_date2;
run;
proc sort data=estimize2;
by obs_num dif;
run;
proc sort nodupkey data=estimize2;
by obs_num;
run;
data estimize2;
set estimize2;
drop dif;
run;


*look at estimize forecasts that fall in the two-day window (-1,0) around ibes forecast revisions;

proc sql;
create table estimize2b as
select *
from estimize2, forecasts2
where estimize2.ticker = forecasts2.oftic and forecasts2.ibes_forecast_cumday   <=estimize2.cumday <=forecasts2.ibes_forecast_cumday +1;
quit;



data estimize2b;
set estimize2b;
forecast_flag =1;
*keep obs_num forecast_flag ;
run;
proc sort nodupkey data=estimize2b;
by obs_Num;
run;
data forecast_info;
set estimize2b;
run;


proc sort data=estimize2;
by obs_num;
run;
proc sort data=estimize2b;
by obs_num;
run;
data estimize;
merge estimize2 estimize2b;
by obs_num;
run;



data recs;
set mylib.detail_recs;
keep IRECCD OFTIC ANNDATS;
run;




data cumday;
set mylib.cumday;
run;

proc sql;
create table recs as
select *
from recs, cumday
where recs.ANNDATS = cumday.date;
quit;




data recs2;
set recs;
ibes_recs_cumday = cumday;
drop cumday;
run;

*look at estimize forecasts that fall in the two-day window (-1,0) around ibes recommendation changes ;

proc sql;
create table estimize2c as
select *
from estimize2, recs2
where estimize2.ticker = recs2.oftic and recs2.ibes_recs_cumday   <=estimize2.cumday <=recs2.ibes_recs_cumday +1;
quit;
proc sort nodupkey data=estimize2c;
by obs_num;
run;


data estimize2c;
set estimize2c;
recs_flag =1;
run;



proc sort data=estimize;
by obs_num;
run;
proc sort data=estimize2c;
by obs_num;
run;
data estimize;
merge estimize estimize2c;
by obs_num;
run;




data guidance;
set mylib.DHGDET_US;
year = year(Announce_dt);
if 2011<=year<=2013;
keep IBES_TKR Announce_dt year;
run;

*NOW NEED TO MERGE WITH IBES_DETAIL DATA TO GET TICKER INFO;


data ibes;
set mylib.detail_forecasts;
year = year(ANNDATS);
keep oftic TICKER year;
run;
proc sort nodupkey data=ibes;
by oftic ticker year;
run;

proc sql;
create table guidance2 as
select *
from guidance, ibes
where guidance.ibes_tkr = ibes.ticker and guidance.year = ibes.year;
quit;

data guidance2;
set guidance2;
drop ticker year;
run;


data cumday;
set mylib.cumday;
guidance_cumday = cumday;
drop cumday;
run;
proc sql;
create table guidance2 as
select *
from guidance2, cumday
where guidance2.Announce_dt = cumday.date;
quit;

*look at estimize forecasts that fall in the two-day window (-1,0) around earnings guidance ;
proc sql;
create table estimize2 as
select *
from estimize, guidance2
where estimize.ticker = guidance2.oftic and guidance2.guidance_cumday   <=estimize.cumday <=guidance2.guidance_cumday +1;
quit;


proc sort nodupkey data=estimize2;
by obs_num;
run;

data estimize2;
set estimize2;
guidance_flag =1;
run;

proc sort data=estimize;
by obs_num;
run;
proc sort data=estimize2;
by obs_num;
run;
data estimize;
merge estimize estimize2;
by obs_num;
run;



data est1;
set estimize;
if guidance_flag =1 then delete;
if recs_flag =1 then delete;
if forecast_flag =1 then delete;
run;


*now I want information on the consensus forecast at the time of the earnings issue;
data ibes;
set mylib.ibes_consensus;
if fpi =6;
keep oftic  cusip numest meanest medest stdev actual ANNDATS_ACT statpers fpedats;
run;


proc sql;
create table est2 as
select *
from est1, ibes
where est1.ticker =ibes.oftic and est1.ACT_DATE = ibes.ANNDATS_ACT and est1.est_date2 > ibes.STATPERS ;
quit;




data est2;
set est2;
date_dif  = est_date2 - statpers;
run;
proc sort data=est1;
by obs_num dif;
run;
proc sort data=est2;
by obs_num date_dif;
run;
data est2;
merge est1 est2;
by obs_num;
run;

proc sort nodupkey data=est2 out=est3;
by obs_num;
run;


data mylib.no_news_sample2;
set est3;
run;






data ret;
set mylib.ret11_13;
prc = abs(prc);
abn_ret = ret - ewretd;
abn_ret2 = ret - vwretd;
year = year(date);
keep permno ticker ret abn_ret abn_ret2 date prc year;
run;

data size;
set mylib.size_breakpoints;
run;
proc sql;
create table ret2 as
select *
from ret, size
where ret.year =size.year and ret.permno = size.permno;
quit;

proc sort data=ret2;
by date size_rank;
run;
proc univariate noprint data=ret2;
by date size_rank;
var ret;
output out=size_return mean=size_return;
run;

proc sql;
create table ret2 as
select *
from ret2, size_return
where ret2.date = size_Return.date and ret2.size_rank = size_return.size_rank;
quit;
data ret2;
set ret2;
*will focus on abn_ret3 (size-adjusted returns);
abn_ret3 = ret - size_return;
keep permno ticker abn_ret3 abn_ret abn_ret2 date  year;
run;




data cumday;
set mylib.cumday;
run;

proc sql;
create table ret as
select *
from ret2, cumday
where ret2.date = cumday.date;
quit;

data ret;
length ticker2 $5;
set ret;
ret_cumday = cumday;
ticker2 = ticker;
drop date cumday;
run;
*going to look at the returns over the [-20, +20] window;
proc sql;
create table est4 as
select *
from est3, ret
where est3.ticker = ret.ticker2 and est3.cumday-20<=ret.ret_cumday<=est3.cumday+20;
quit;


*need to scale forecast revisions by price so want the price at day -20;
data prc;
set mylib.ret11_13;
prc = abs(prc);
keep ticker prc date;
run;
data cumday;
set mylib.cumday;
*will match with est_cumday - will lag price 21 days relative to estimate date;
prc_cumday = cumday +21;
keep date prc_cumday;
run;
proc sql;
create table prc as
select *
from prc, cumday
where prc.date =cumday.date;
quit;

proc sql;
create table est4 as
select *
from est4, prc
where est4.ticker = prc.ticker and est4.cumday = prc.prc_cumday;
quit;



data est4;
set est4;
prc = abs(prc);

estimize_update1 = (EPS - previous_eps)/PRC;
*estimize update 2 is key variable, measures consensus revisions;
estimize_update2 = (EPS - consensus_eps)/PRC;
event_dif = ret_cumday - cumday;


run;



*now limiting sample to event days 0 and 1;


data test;
set est4;
where 0<=event_dif<=1 ;
*if date_dif = . then delete;
*taking logged returns so that I can compute a compound return;
abn_ret1 = log(1+abn_ret);
abn_ret2 = log(1+abn_ret2);
abn_ret3 = log(1+abn_ret3);
prior_forecasts = consensus_obs;
drop consensus_obs;
run;

proc sort data=test;
by est_date2 permno ticker epsactual ACT_date ;
run;
proc univariate noprint data=test;
by est_date2 permno ticker epsactual ACT_date ;
var estimize_update2 estimize_update1;
output out=consensus mean=consensus_update2 consensus_update1 N=consensus_sample;
run;

data consensus;
set consensus;
consensus_obs2 = _N_;
run;


proc sql;
create table test as
select *
from test, consensus
where test.est_date2 = consensus.est_date2 and test.permno = consensus.permno;
quit;
proc sort nodupkey data=test;
by consensus_obs2 event_dif;
run;

proc sort data=test;
by consensus_obs2 PERMNO ticker epsactual ACT_date est_date2 consensus_update2 consensus_update1 forecast_age consensus_sample prior_forecasts;
run;
*for each consensus revision will look at the BHAR on days [0,1];
proc univariate noprint data=test;
by consensus_obs2 PERMNO ticker epsactual ACT_date est_date2 consensus_update2 consensus_update1 forecast_age consensus_sample prior_forecasts;
var abn_ret1 abn_ret2 abn_ret3;
output out=test sum=abn_ret1 abn_ret2 abn_ret3 N=daily_obs;
run;



data test2;
set test;
abn_ret1 = (exp(abn_ret1) -1)*100;
abn_ret2 = (exp(abn_ret2) -1)*100;
abn_ret3 = (exp(abn_ret3) -1)*100;
*where forecast_age <=20;
run;


proc univariate data=test2;
var consensus_update2;
run;


*  winsorize  consensus_update at 1st and 99th percentile;

proc univariate noprint data=test2;
var consensus_update2;
output out=results p1 = consensus1 p99=consensus99;
run;

proc sql;
create table test2 as
select *
from test2, results;
quit;

data test2;
set test2;
win_update = consensus_update2;
if win_update >consensus99 then win_update = consensus99;
if win_update <consensus1 then win_update = consensus1;
if consensus_update2 >0 then upward =1;
else upward =0;
run;


*will scale by its standard deviation;
proc univariate  data=test2;
var win_update;
output out=results std=std_win_update;
run;
proc sql;
create table test2 as
select *
from test2, results;
quit;
data test2;
set test2;
win_update2 = win_update/std_win_update;
run;


***specification 1 of Table 10;
proc surveyreg data=test2;
cluster permno;
model abn_ret3 = win_update2;
quit;


***specification 2 of Table 10;
proc surveyreg data=test2;
cluster permno;
model abn_ret3 = upward;
quit;

*also want to collect data on whether actual EPS is the same for IBES and Estimize;

data sample;
set mylib.estimize_unadjusted;
keep ticker  act_date epsactual bad_actuals;
run;
proc sort nodupkey data=sample;
by ticker act_date epsactual;
run;

proc sql;
create table test2b as
select *
from test2, sample
where test2.ticker = sample.ticker and test2.act_date = sample.act_date;
quit;
run;
proc means data=test2b;
var bad_actuals;
run;
proc sort data=test2;
by consensus_obs2;
run;
proc sort data=test2b;
by consensus_obs2;
run;
data test2;
merge test2 test2b;
by consensus_obs2;
bad_actuals2=  bad_actuals;
if bad_actuals = . then bad_actuals2 =0;
win_update2_actuals = win_update2 * bad_actuals2;
run;





proc sort data=test2;
by upward;
run;
proc rank data=test2 out=test3 groups=2;
by upward;
var  consensus_update2 consensus_update1;
run;

data test3;
set test3;
if upward =1 and consensus_update2 =1 then win =1;
else win =0;
if upward =0 and consensus_update2 =0 then lose =1;
else lose =0;
run;
*specification 3 of Table 10;
proc surveyreg data=test3;
cluster permno;
model abn_ret3 = win lose;
quit;



data win;
set test3;
if win + lose =1;
keep win lose permno est_date2;
run;

*can look at longer horizons [-20, 20];
data sample;
set est4;
abn_ret3 = abn_ret3 *100;
where -20<=event_dif<=20;
keep permno est_date2 event_dif abn_ret3;
run;

proc sql;
create table win as
select *
from win, sample
where win.permno = sample.permno and win.est_date2 = sample.est_date2;
quit;
proc sort nodupkey data=win;
by permno est_date2 event_dif;
run;
proc sort data=win;
by win event_dif ;
run;
*this file contains the data necessary for Figure 2;
proc univariate noprint data=win;
by win event_dif ;
var abn_ret3;
output out=results mean=abn_ret3 N=obs;
run;



*will also look at price impact for forecasts with different characteristics;


data char;
set mylib.char2;
log_coverage = log(1+ibes_coverage);
keep year permno ticker log_size log_bm log_vol log_turn log_coverage ibes_coverage;
run;

data test3;
set test3;
char_year = year(est_date2) -1;
run;

proc sql;
create table test4 as
select *
from test3, char
where test3.permno = char.permno and test3.char_year = char.year;
quit;
proc sort nodupkey data=test4;
by consensus_obs2;
run;
proc univariate data=test4;
var log_size log_coverage;
run;





data test5;
set test4;
if ibes_coverage <10 then low_coverage =1;
else low_coverage =0;
win_update2_coverage = win_update2 * low_coverage;
if forecast_age <=8 then short_forecast =1;
else short_forecast =0;
win_update2_sf = win_update2 * short_forecast;
run;


***specification 5 of Table 10;
proc surveyreg data=test5;
cluster permno;
model abn_ret3 = win_update2 win_update2_sf short_forecast low_coverage win_update2_coverage win_update2_actuals bad_actuals2;  ;
quit;