Using the eXtremeSQL Group By Clause

Standard Group By

eXtremeSQL allows standard group by clauses like the following:

     
    -- show items in P with average quantity > 320
    SELECT DISTINCT SP.pid FROM SP GROUP BY SP.pid, SP.jid HAVING avg(SP.qty) > 320;
     
    -- show items in S with links to P2
    SELECT DISTINCT S.sname FROM S,SP WHERE S.sid = SP.sid AND SP.pid = 'P2' GROUP BY S.sname;
     
    -- show Weight in Grams for red and blue items in P with quantity > 200 and sum > 350 with text in output
    SELECT P.pid, 'Grams = ' AS TEXT1, P.weight * 454 AS GMWET, P.color, 'Max Qty = ' AS TEXT2,
    max(qty) as MQY FROM P,SP WHERE P.pid = SP.pid AND (P.color = 'Red' OR P.color = 'Blue') AND SP.qty > 200
    GROUP BY P.pid, P.weight, P.color HAVING sum(SP.qty) > 350;
     

and more complex clauses using a having condition like the following:

     
    -- show items in S grouped by city with count > 1
    SELECT count(*) AS c,sid FROM S GROUP BY city HAVING c>1;
     
    -- show items in S grouped by city with count > 1
    SELECT count(*),sid FROM S GROUP BY city HAVING count(*)>1;
     

Aggregate functions can be also used with the group by clause:

     
    -- show count of rows having the same 'city'
    SELECT city, COUNT(sid) "Count" FROM S GROUP BY city;
     
    -- show count of rows having the same 'status'
    SELECT status, COUNT(sid) "Count" FROM S GROUP BY status;
     
    -- use HAVING clause to show count of rows having the 'status' = 30
    SELECT status, COUNT(sid) "Count" FROM S GROUP BY status HAVING status = 30;
     

 

Tips on Group By usage

Note that the group by clause requires an aggregation function for target values not included in the group by list to be able to show only one row for each grouped value. Or alternatively it is necessary to specify all of the target values in the group by list.

For instance suppose we define and fill a table Cities as follows:

     
    create table Cities(city string, jid string);
    insert into Cities values('Paris','J1');
    insert into Cities values('Rome','J2');
    insert into Cities values('Athens','J3');
    insert into Cities values('Athens','J4');
    insert into Cities values('London','J5');
    insert into Cities values('Oslo','J6');
    insert into Cities values('London','J7');
     

Now if we query the table with xSQL we see that ‘Athens’ and ‘London’ have multiple objects:

     
    XSQL>select * from Cities;
    city    jid
    ------------------------------------------------------------------------------
    Paris   J1
    Rome    J2
    Athens  J3
    Athens  J4
    London  J5
    Oslo    J6
    London  J7
     
    Selected records: 7
     

So if we attempt to group by the city column we get the following error:

     
    XSQL>select * from Cities group by city;
    ERROR: Compiler error at position 25: No aggregates are specified for GROUP BY
    select * from Cities group by city
     

Instead we need to use an aggregate function to reduce the number of results for a column that may have multiple values. For example:

     
    XSQL>select city, max(jid) from Cities group by city;
    city    #2
    ------------------------------------------------------------------------------
    Athens  J4
    London  J7
    Paris   J1
    Rome    J2
    Oslo    J6
     
    Selected records: 5
     

Alternatively, to select all target values it is necessary to list all columns that may have multiple duplicate values in the select list and in the group by list. For example:

     
    XSQL>select city,jid from Cities group by city,jid;
    city    jid
    ------------------------------------------------------------------------------
    Athens  J3
    Athens  J4
    London  J5
    London  J7
    Oslo    J6
    Paris   J1
    Rome    J2
     
    Selected records: 7
     

 

UDF Function-Based Group By

To use a UDF in a group by clause, the UDF must be defined and registered as described in the user-defined functions page. The following example code demonstrates how to use a UDF named “dateformat” to custom format date field output:

     
    const char * db_name = "groupbydb";
     
    // Define global SQL engine
    using namespace McoSql;
    McoSqlEngine engine;
     
    // Define the structure correponding to database table Object
    struct _Contribution
    {
        char const * name;
        uint4        date_val;
        uint4        amount;
    };
     
    _Contribution contributions[] = 
    {
        { "Michael", 91224, 250 },
        { "Sharon", 91224, 50 },
        { "George", 100101, 100 },
        { "Sharon", 100101, 150 },
        { "Michael", 100321, 300 },
        { "Liz", 100321, 20 },
        { "Bob", 100322, 25 }
    };
     
    // Declare and instantiate the user-defined DateFormat function
    static String* dateformat( McoSql::Value* date)
    {
        char str[64];
        int date_val = (int)date->intValue();
        int yy = date_val / 10000;
        int mm = date_val / 100 % 100;
        int dd = date_val % 100;
         
        // Format dd.mm.yyyy assuming all dates are after 2000
        sprintf( str, "%d.%d.20%02d", dd, mm, yy );
        return String::create( str );
    }
     
    static SqlFunctionDeclaration udf
    (
        tpString,          // tpInt is the return value of the UDF
        "dateformat",      // the name of the function as we’ll use it in a query
        (void*)dateformat, // the function pointer
        1                  // the number of arguments to the UDF
    );
     
    int main()
    {
        ...
        engine.open( db_name, groupbydb_get_dictionary(), DATABASE_SIZE, MEMORY_PAGE_SIZE);
     
        // Fill database
        for ( int i=0; i < (int)(sizeof( contributions ) / sizeof( contributions[0] )); i++ ) 
        {
            addContribution( &contributions[i] );
        }
         
        // Show all objects
        queryContributions( "Select * from Contributions" );
         
        // Show sub-totals by contibutor
        showGroup1();
         
        // Show sub-totals by date using user-defined DateFormat
        showGroup2();
        engine.close();
        sample_pause_end("\n\nPress any key to continue . . . ");
        return 0;
    }
     
    int addContribution( _Contribution* c)
    {
        int ret = engine.executeStatement("insert into Contributions %r", c);
        if ( 0 == ret )
        {
            printf("\t\tError inserting Contributions %s\n", c->name );
        }
        return ret;
    }
     
    int queryContributions( char * sql )
    {
        printf( "\n%s\n", sql);
        QueryResult result( engine.executeQuery( sql ) );
        int count = 0;
        Cursor* cursor = result->records();
        while ( cursor->hasNext() )
        {
            Record* rec = cursor->next();
            _Contribution c;
            result->extract( rec, &c, sizeof(c) );
            printContribution( count+1, c );
            count++;
        }
        return count;
    }
     
    void showGroup1()
    {
        sprintf( sql, "Select name, sum(amount) from Contributions group by name" );
        printf( "\n%s\n", sql );
        QueryResult result( engine.executeQuery( sql ) );
        Cursor* cursor = result->records();
        for (int i = 1; cursor->hasNext(); i++)
        {
            Record* rec = cursor->next();
            printf( "\n\t%d)\t%s\t%f", i, ((String*)rec->get(0))->body(), rec->get(1)->realValue() );
        }
    }
     
    void showGroup2()
    {
        sprintf( sql, "Select dateformat(date_val), sum(amount) from Contributions group by date_val" );
        printf( "\n\n%s\n", sql );
        QueryResult result( engine.executeQuery( sql ) );
        Cursor* cursor = result->records();
        for (int i = 1; cursor->hasNext(); i++)
        {
            Record* rec = cursor->next();
            printf( "\n\t%d)\t%s\t%f", i, ((String*)rec->get(0))->body(), rec->get(1)->realValue() );
        }
    }