clear;
load('fitting_model.mat');
load('Design_information.mat');
n_vdd=length(vdd_allocation(:,1));
i_vpp=0;
act = 0:0.001:1;
f_DVFS = 1;
ratio_retention_dff = 0.05;
minimum_on_percent = 0.125; % 1 core out of 8 cores on means 12.5% 
Throuput = [7.3e7*0.0001, 7.3e7 * 0.001, 7.3e7 * 0.01, ...
   7.3e7 * 0.1, 7.3e7 * 0.5, 7.3e7 * 0.9] ;
for i_act = 1:length(act)
    p_overhead (i_act) = P_SwtichVsAct ( act(i_act));
end
% plot(act,p_overhead);


for i_pp=1:length(Scaling)
    neg_error=0;
    if(Scaling(i_pp).f_fit)
        DVDS(i_pp).f_fit=1;
        continue;
    end
    Peak_TP=1/10^Scaling(i_pp).origin_delay;
    Peak_P=10^Scaling(i_pp).origin_power;
%     Th_put = 0.1;
    Th_put = Throuput./Peak_TP;
    i_vpp=i_vpp+1;
    d_fit=Scaling(i_pp).delay_fit;
    p_fit=Scaling(i_pp).power_fit;
    l_fit=Scaling(i_pp).leakage_fit;
    for i_tp=1:length(Th_put)
        min_power(i_vpp,i_tp)=inf;
        if( Th_put(i_tp) > 1 - P_SwtichVsAct(0) - P_SwtichVsAct(1))
            Related_Throughput(i_vpp,i_tp)=Peak_TP*Th_put(i_tp);
            fraction1(i_vpp,i_tp)=1;
            fraction2(i_vpp,i_tp)=0;
            DVDS(i_pp).VDD1(i_tp,:)=max(max(Scaling(i_pp).VDD));
            DVDS(i_pp).VDD2(i_tp,:)=max(max(Scaling(i_pp).VDD));
            DVDS(i_pp).min_power(i_tp)=min_power(i_vpp,i_tp);
            DVDS(i_pp).Throughput(i_tp)=Th_put(i_tp);
            DVDS(i_pp).fraction(i_tp,1)=1;
            DVDS(i_pp).fraction(i_tp,2)=0;
            DVDS(i_pp).second_delay(i_tp)=10^Scaling(i_pp).origin_delay/delay_weight;
            DVDS(i_pp).first_delay(i_tp)=10^Scaling(i_pp).origin_delay;
            DVDS(i_pp).first_power(i_tp)=10^Scaling(i_pp).origin_power;
            continue;
        end
        
        if(f_DVFS)
        for f1=0:0.01:Th_put(i_tp)
            for f2=max(Th_put(i_tp)-f1-P_SwtichVsAct(f1),minimum_on_percent):0.01:1-f1
                Demond_delay=f2/(Th_put(i_tp)*Peak_TP-f1*Peak_TP)/delay_weight;
               
                if(Demond_delay > 0)
                        retention_vdd = 0;
                  for i_stage=1:length(optimizing_stage)
                        min_vdd=min(Scaling(i_pp).VDD(i_stage,find(Scaling(i_pp).VDD(i_stage,:)>0)));
                        retention_vdd = max(retention_vdd,min_vdd);
                        max_vdd=max(Scaling(i_pp).VDD(i_stage,:));
                        while(max_vdd-min_vdd>0.0001)
                            bisearch_vdd=(min_vdd+max_vdd)/2;
                            if(Delay_fun(d_fit(i_stage,:),bisearch_vdd)>Demond_delay)
                                min_vdd=bisearch_vdd;
                            else
                                max_vdd=bisearch_vdd;
                            end
                        end
                        Demond_vdd(i_stage)=bisearch_vdd;
                  end         
                    for i_vdd=1:n_vdd
                        Demond_vdd(vdd_allocation(i_vdd,find(vdd_allocation(i_vdd,:)>0)))=max(Demond_vdd(vdd_allocation(i_vdd,find(vdd_allocation(i_vdd,:)>0))));
                    end
                    for i_stage=1:length(optimizing_stage)
                        This_delay(i_stage)=Delay_fun(d_fit(i_stage,:),Demond_vdd(i_stage));
                        This_power(i_stage)=Power_fun(p_fit(i_stage,:),Demond_vdd(i_stage));
                        This_leakage(i_stage)=Leakage_fun(l_fit(i_stage,:),Demond_vdd(i_stage));
                        Retention_leakage(i_stage) = Leakage_fun(l_fit(i_stage,:),retention_vdd);
                    end
                    if(max(This_delay)>1.05*Demond_delay)
%                      disp('bisearch''s precision needs to be increased');
                        continue;
                    end
                else
                    retention_vdd = 0;
                    for i_stage=1:length(optimizing_stage)
                        min_vdd = min(Scaling(i_pp).VDD(i_stage,find(Scaling(i_pp).VDD(i_stage,:)>0)));
                        Demond_vdd(i_stage) = min_vdd;
                        retention_vdd = max(0,min_vdd);
                    end
                    for i_stage=1:length(optimizing_stage)
                        Retention_leakage(i_stage) = Leakage_fun(l_fit(i_stage,:),retention_vdd);               
                    end
                    for i_vdd=1:n_vdd
                        Demond_vdd(vdd_allocation(i_vdd,find(vdd_allocation(i_vdd,:)>0)))=max(Demond_vdd(vdd_allocation(i_vdd,find(vdd_allocation(i_vdd,:)>0))));
                    end
                end
                real_power=activity * This_power/(max(This_delay)*delay_weight)+This_leakage;
                total_power = optimizing_stage_weight*real_power'* (f2 + P_SwtichVsAct( f2)) + Peak_P* (f1 +  P_SwtichVsAct( f1)) ...
                    + ratio_retention_dff * optimizing_stage_weight*Retention_leakage'* max(0,( 1- f2 - P_SwtichVsAct( f2) - f1 -  P_SwtichVsAct( f1)));
               
                if(min_power(i_vpp,i_tp)>total_power)
                    min_power(i_vpp,i_tp)= total_power;
                    if(min_power(i_vpp,i_tp)<0)
                       neg_error=1;
                       min_power(i_vpp,i_tp) = inf;
                       display('error: found negative power');
                       display(i_vpp);
                       break;
                    end
                    temp_VDD=Demond_vdd;
                    temp_f1=f1;
                    temp_f2=f2;
                    temp_delay=Demond_delay*delay_weight;
                end
            end
            if(neg_error)
                
                break;
            end
        end
        else
            f1 = Th_put(i_tp);
            temp_f1=f1;
            temp_f2=0;
            for i_stage=1:length(optimizing_stage)
                retention_vdd = min(Scaling(i_pp).VDD(i_stage,find(Scaling(i_pp).VDD(i_stage,:)>0)))
            end
            total_power =  Peak_P*(f1 + P_SwtichVsAct( f1));
            temp_delay = inf;
        end
        
        Related_Throughput(i_vpp,i_tp)=Peak_TP*Th_put(i_tp);
        fraction1(i_vpp,i_tp)=temp_f1;
        fraction2(i_vpp,i_tp)=temp_f2;
        DVDS(i_pp).min_power(i_tp)=min_power(i_vpp,i_tp);
        DVDS(i_pp).Throughput(i_tp)=Th_put(i_tp);
        DVDS(i_pp).fraction(i_tp,1)=temp_f1;
        DVDS(i_pp).fraction(i_tp,2)=temp_f2;
        DVDS(i_pp).VDD1(i_tp,:)=max(max(Scaling(i_pp).VDD));
        DVDS(i_pp).VDD2(i_tp,:)=max(temp_VDD);
        DVDS(i_pp).second_delay(i_tp)=temp_delay;
        DVDS(i_pp).first_delay(i_tp)=10^Scaling(i_pp).origin_delay;
        DVDS(i_pp).first_power(i_tp)=10^Scaling(i_pp).origin_power;
    end
    if(neg_error==0)
        Peak_Throught(i_vpp)=Peak_TP;
        Peak_Power(i_vpp)=Peak_P;
    
    else
        i_vpp=i_vpp-1;
    end
end
save('DVDS','Scaling','DVDS');
for i_tp=1:length(Th_put)
    final_result(i_tp,1) = Throuput(i_tp);
    ind = 1;
    min_p = DVDS(1).min_power(i_tp);
    for i_d = 2: length(Scaling)
        if(length(DVDS(i_d).min_power)==0)
            continue;
        end
        if(DVDS(i_d).min_power(i_tp) < min_p)
            min_p = DVDS(i_d).min_power(i_tp);
            ind = i_d;
        end
    end
    final_result(i_tp,2) = min_p;
    ind2 = find( min_power(:,i_tp) == min( min_power(:,i_tp)) );
    appendix_result(i_tp,1) = 10^Scaling(ind(1)).origin_delay;
    appendix_result(i_tp,2) = 10^Scaling(ind(1)).origin_power;
    appendix_result(i_tp,3) = max(max(Scaling(ind(1)).VDD));
    appendix_result(i_tp,4) = DVDS(ind(1)).VDD2(i_tp,1);
    appendix_result(i_tp,5) = fraction1(ind2(1),i_tp);
    appendix_result(i_tp,6) = fraction2(ind2(1),i_tp);    
end
figure;
loglog(final_result(:,1),final_result(:,2),'*--','Linewidth',3);

dlmwrite('DVFS_constantThPut.txt',final_result,'\t');
dlmwrite('DVFS_appendix',appendix_result,'\t');
                
                