BATCH 4 | Project Stage 3, Part 1 - Tidy & Wrap

 Project Stage 3, Part 1 - Tidy & Wrap

Extending Project Stage 2 Code Functionality

Instructions

In this project, we will make use of the existing Clone-Prune Analysis Pass created for Project Stage 2. Down below are the requirements expected for us to complete in this stage. Instructions were taken from Chris Tyler's Wiki:

1. Extend the functionality of your code so that it can process multiple sets of cloned functions and make a PRUNE / NOPRUNE recommendation for each (your code may already do this). In other words, remove the assumption from Stage II that “There is only one cloned function in a program”.

2. Create one or more test cases that contain a minimum of two cloned functions per program. You can do this by extending the test cases that I provided for Stage II; or you can create your own test cases.

3. Verify that your code works correctly on both architectures (x86_64 and aarch64), with multiple functions, in both the PRUNE and NOPRUNE scenarios.

4. Ensure that you test a scenario where one (or more) functions have a PRUNE recommendation while one (or more) other functions have a NOPRUNE recommendation. This is probably most easily done by having one function that does a simple operation such as I/O or scalar arithmetic, and another function that processes an array or buffer of data in a way that is vectorizable.

5. Clean up any remaining issues you have identified with your Stage II work.

Let's Dive In!

STEP 1: I have to ensure my Project Stage 1 and Stage 2 have been completed and are working as intended. Since Stage 2 of this project had the assumption of only one cloned function, I had to modify my code and make sure it can can process multiple sets of cloned functions. Down below is my updated Clone-Prune Analysis Pass: 

(Code can be tested!)

#include "config.h"

#include "system.h"

#include "coretypes.h"

#include "backend.h"

#include "tree.h"

#include "basic-block.h"

#include "function.h"

#include "gimple.h"

#include "gimple-iterator.h"

#include "tree-pass.h"

#include "context.h"


#include <string.h>        

#include <unordered_map>   

#include <vector>          

#include <string>          

#include <cstdio>         


namespace {


const pass_data count_pass_data = {

    GIMPLE_PASS,  

    "count",     

    OPTGROUP_NONE,

    TV_NONE,   

    PROP_gimple_any, 

    0, 0, 0, 0    

};


struct clone_signature {

    int bb_count;

    int gimple_count;

};

static std::unordered_map<std::string, std::vector<clone_signature>> clone_map;


bool is_internal_function(const char *name) {

    return (strstr(name, "<anonymous>") != nullptr ||

            strstr(name, "tree_") != nullptr ||

            strstr(name, "hash_") != nullptr ||

            strstr(name, "va_gc::") != nullptr ||

            strstr(name, "pass_") != nullptr ||

            strstr(name, "optimize_") != nullptr ||

            strstr(name, "dump_") != nullptr);

}


bool is_user_function(const char *name) {

    return (strstr(name, "scale_samples") != nullptr ||

            strstr(name, "prune") != nullptr ||

            strstr(name, "noprune") != nullptr);

}


class count_pass : public gimple_opt_pass {

public:

    count_pass(gcc::context *ctxt)

      : gimple_opt_pass(count_pass_data, ctxt) {}


    bool gate(function *fun) override {

        (void)fun;  

        return true;

    }


    unsigned int execute(function *fun) override {

        const char *full_name = function_name(fun);

        if (!full_name)

            return 0;

        if (is_internal_function(full_name))

            return 0;

        if (!is_user_function(full_name))

            return 0;


        char base_name[256] = "";

        bool is_resolver = (strstr(full_name, "resolver") != nullptr);

        const char *dot = strchr(full_name, '.');

        if (dot != nullptr) {

            size_t len = dot - full_name;

            if (len >= sizeof(base_name))

                len = sizeof(base_name) - 1;

            strncpy(base_name, full_name, len);

            base_name[len] = '\0';

            fprintf(stderr, "Detected clone for function: %s (full name: %s)\n",

                    base_name, full_name);

        } else {

            strncpy(base_name, full_name, sizeof(base_name) - 1);

            base_name[sizeof(base_name) - 1] = '\0';

            fprintf(stderr, "Processing original function (for clone comparison): %s\n", full_name);

        }

        int bb_count = 0, gimple_count = 0;

        basic_block bb;

        FOR_EACH_BB_FN(bb, fun) {

 for (gimple_stmt_iterator gsi = gsi_start_bb(bb);

                 !gsi_end_p(gsi);

                 gsi_next(&gsi)) {

                gimple_count++;

            }

            bb_count++;

        }

        if (!is_resolver) {

            std::string base_str(base_name);

            clone_map[base_str].push_back({ bb_count, gimple_count });

            if (clone_map[base_str].size() >= 2) {

                bool all_same = true;

                const clone_signature &first = clone_map[base_str][0];

                for (size_t i = 1; i < clone_map[base_str].size(); ++i) {

                    if (clone_map[base_str][i].bb_count != first.bb_count ||

                        clone_map[base_str][i].gimple_count != first.gimple_count) {

                        all_same = false;

                        break;

                    }

                }

                if (all_same) {

                    if (dump_file)

                        fprintf(dump_file, "PRUNE: %s\n", base_name);

                    else

                        fprintf(stderr, "PRUNE: %s\n", base_name);

                } else {

                    if (dump_file)

                        fprintf(dump_file, "NOPRUNE: %s\n", base_name);

                    else

                        fprintf(stderr, "NOPRUNE: %s\n", base_name);

                }

                clone_map.erase(base_str);

            }

        } else {

            fprintf(stderr, "Skipping resolver function: %s\n", full_name);

        }


        if (dump_file)

            fprintf(dump_file, "Function %s: %d BBs, %d GIMPLE stmts\n",

                    full_name, bb_count, gimple_count);

        else

            fprintf(stderr, "Function %s: %d BBs, %d GIMPLE stmts\n",

                    full_name, bb_count, gimple_count);

        return 0;

    }

};



opt_pass *make_count_pass(gcc::context *ctxt) {

    return new count_pass(ctxt);

}


Changes Made From Previous Code:


1. Clone Tracking Approach 


The first approach was tracking with static variables, meaning the code uses more global static variables that were used for boolean flags and character arrays. This approach allowed us to identify clones and compare them based on character arrays for basic block counts and GIMPLE counts. This could be used for a simpler clone-prune analysis with the assumption of only one cloned function.


To ensure this code can process multiple cloned functions, using an unordered map approach would be better to map a function's base name to a vector clone signature. Additionally, this method allows the program to store multiple clones where every time a non-resolver function passes the filtering for resolver functions, it will then be added to its clone map.  


2. Multiple Clones Comparison


Since part of the requirements is to allow the code to process multiple cloned functions, the new approach supports this by iterating through all the signatures once vector size is at least 2. Following the same logic where if all stored signatures have the same number of BBs and GIMPLE statements, it will produce a PRUNE message as clones are substantially the same. Otherwise, if difference have been identified between the non-resolver functions, then it will print a NOPRUNE message.



STEP 2: After the updating my Clone-Prune Analysis Pass code, I wanted to make sure everything is fresh, I performed the steps for Lab 4 to rebuild gcc. Firstly, I deleted the entire gcc-build-001 by using rm -rf gcc-build-001, then created another directory for build by using mkdir gcc-build-001, and had to work in this file so I changed to directory by using cd gcc-build-001

To make sure I run the GCC configuration script at the right location I used this command ~/gcc/configure --prefix=$HOME/gcc-test-001, then ran make by using this time make -j $(( $(nproc)*3/2)) |& tee build.log.

STEP 3:  After a successful build, I have to test out the modified Clone-Prune Analysis Code pass to see if it works for multiple cloned functions. We were given the option to create our own test cases, but in my case, I decided to expand the clone test files that was provided by Chris Tyler. The code down below is the original code taken from Chris Tyler's clone-test-core.c file:

#include <stdlib.h>

#include <stdio.h>

#include <stdint.h>

#include <stdbool.h>

#include "vol.h"


int sum_sample (int16_t *buff, size_t samples) {

  int t;

  for (int x = 0; x < SAMPLES; x++) {

    t += buff[x];

  }

  return t;

}


CLONE_ATTRIBUTE

void scale_samples(int16_t *in, int16_t *out, int cnt, int volume) {

        for (int x = 0; x < cnt; x++) {

                out[x] = ((((int32_t) in[x]) * ((int32_t) (32767 * volume / 100) <<1) ) >> 16);

        }

}


int main() {

int             x;

        int             ttl=0;


// ---- Create in[] and out[] arrays

        int16_t*        in;

        int16_t*        out;

        in =  (int16_t*) calloc(SAMPLES, sizeof(int16_t));

        out = (int16_t*) calloc(SAMPLES, sizeof(int16_t));


// ---- Create dummy samples in in[]

        vol_createsample(in, SAMPLES);


// ---- This is the part we're interested in!

// ---- Scale the samples from in[], placing results in out[]

        scale_samples(in, out, SAMPLES, VOLUME);


// ---- This part sums the samples.

        ttl=sum_sample(out, SAMPLES);


// ---- Print the sum of the samples.

        printf("Result: %d\n", ttl);


        return 0;


}


Since it is required that we test it out with multiple cloned functions, I have expanded Chris Tyler's code by adding functions that produce PRUNE or NOPRUNE messages based on whether or not the cloned functions are substantially the same. Down below is the updated code for testing: 

#include <stdlib.h>

#include <stdio.h>

#include <stdint.h>

#include <string.h>


#ifndef SAMPLES

#define SAMPLES 100

#endif

#ifndef VOLUME

#define VOLUME 50

#endif

#include "vol.h"

#if defined(__aarch64__)

    #define CLONE_ATTRIBUTE_PRUNE __attribute__((target_clones("default", "rng")))

    #define CLONE_ATTRIBUTE_NOPRUNE __attribute__((target_clones("default", "sve2")))

#elif defined(__x86_64__)

    #define CLONE_ATTRIBUTE_PRUNE __attribute__((target_clones("default", "popcnt")))

    #define CLONE_ATTRIBUTE_NOPRUNE __attribute__((target_clones("default", "arch=x86-64-v3")))

#else

    #define CLONE_ATTRIBUTE_PRUNE

    #define CLONE_ATTRIBUTE_NOPRUNE

#endif


// --- A helper function ---

int sum_sample (int16_t *buff, size_t samples) {

    int t = 0;

    for (size_t x = 0; x < samples; x++) {

        t += buff[x];

    }

    return t;

}


// --- A function to scale samples ---

// This function is marked with the PRUNE attribute so that all its clones should be identical.

CLONE_ATTRIBUTE_PRUNE

void scale_samples(int16_t *in, int16_t *out, int cnt, int volume) {

    for (int x = 0; x < cnt; x++) {

        out[x] = ((((int32_t) in[x]) *

                  ((int32_t) (32767 * volume / 100) << 1)) >> 16);

    }

}


// ======================================================================

// ADDED FUNCTIONS ARE DOWN BELOW


CLONE_ATTRIBUTE_PRUNE

void noprune_func4(int *arr, int n) {

    for (int i = 0; i < n; i++) {

         arr[i] = arr[i] * 2;

    }

}


CLONE_ATTRIBUTE_PRUNE

void noprune_func5(int *arr, int n) {

    for (int i = 0; i < n; i++) {

         arr[i] = arr[i] * 2;

    }

}


CLONE_ATTRIBUTE_PRUNE

int prune_func3(int a) {

    return a + 100;

}



CLONE_ATTRIBUTE_NOPRUNE

void noprune_func1(int *arr, int n) {

    for (int i = 0; i < n; i++) {

        if (i % 2 == 0)

            arr[i] = arr[i] * 3;

        else

            arr[i] = arr[i] * 3 + 1;


 }

}


CLONE_ATTRIBUTE_NOPRUNE

void noprune_func2(int *arr, int n) {

    for (int i = 0; i < n; i++) {

        int temp = arr[i] + 5;

        if (temp % 2 == 0)

            arr[i] = temp * 2;

        else

            arr[i] = temp * 2 + 1;

    }

}


CLONE_ATTRIBUTE_NOPRUNE

int prune_func4(int a) {

    if (a < 50)

        return a + 105; 

    else

        return a + 95;

}


// ======================================================================

// main()

int main(void) {

    int arr[10];

    for (int i = 0; i < 10; i++) {

         arr[i] = i;

    }


    int16_t *in = (int16_t*) calloc(SAMPLES, sizeof(int16_t));

    int16_t *out = (int16_t*) calloc(SAMPLES, sizeof(int16_t));

    vol_createsample(in, SAMPLES);


    scale_samples(in, out, SAMPLES, VOLUME);

    int sum = sum_sample(out, SAMPLES);

    printf("scale_samples sum: %d\n", sum);


    noprune_func4(arr, 10);

    noprune_func5(arr, 10);

    int r1 = prune_func3(5);

    printf("prune_func3 returns: %d\n", r1);


    noprune_func1(arr, 10);

    noprune_func2(arr, 10);

    int r2 = prune_func4(5);

    printf("noprune_func3 returns: %d\n", r2);


    free(in);

    free(out);

    return 0;

}


This is the end of Project Stage 3, Part 1. 


Stay tuned for Project Stage 3, Part 2 where I will walk you through the process of testing while comparing the cloned functions to prove my pass works with multiple functions. Thank you for reading! 







Comments

Popular posts from this blog

BATCH 3 | Project Stage 2, Part 3 - Cloned Functions Comparison and Reflection

BATCH 4 | Clone-Prune Analysis Code Pass On Both Architectures

BATCH 3 | Project Stage 2, Part 1 - Clone-Pruning Analysis Pass