Standard Group By
eXtremeSQL allows standard group by clauses like the following:
-- show items in P with average quantity > 320 SELECT DISTINCT SP.pid FROM SP GROUP BY SP.pid, SP.jid HAVING avg(SP.qty) > 320; -- show items in S with links to P2 SELECT DISTINCT S.sname FROM S,SP WHERE S.sid = SP.sid AND SP.pid = 'P2' GROUP BY S.sname; -- show Weight in Grams for red and blue items in P with quantity > 200 and sum > 350 with text in output SELECT P.pid, 'Grams = ' AS TEXT1, P.weight * 454 AS GMWET, P.color, 'Max Qty = ' AS TEXT2, max(qty) as MQY FROM P,SP WHERE P.pid = SP.pid AND (P.color = 'Red' OR P.color = 'Blue') AND SP.qty > 200 GROUP BY P.pid, P.weight, P.color HAVING sum(SP.qty) > 350;and more complex clauses using a
having
condition like the following:-- show items in S grouped by city with count > 1 SELECT count(*) AS c,sid FROM S GROUP BY city HAVING c>1; -- show items in S grouped by city with count > 1 SELECT count(*),sid FROM S GROUP BY city HAVING count(*)>1;Aggregate functions can be also used with the
group by
clause:-- show count of rows having the same 'city' SELECT city, COUNT(sid) "Count" FROM S GROUP BY city; -- show count of rows having the same 'status' SELECT status, COUNT(sid) "Count" FROM S GROUP BY status; -- use HAVING clause to show count of rows having the 'status' = 30 SELECT status, COUNT(sid) "Count" FROM S GROUP BY status HAVING status = 30;
Tips on Group By usage
Note that the
group by
clause requires an aggregation function for target values not included in the group by list to be able to show only one row for each grouped value. Or alternatively it is necessary to specify all of the target values in the group by list.For instance suppose we define and fill a table Cities as follows:
create table Cities(city string, jid string); insert into Cities values('Paris','J1'); insert into Cities values('Rome','J2'); insert into Cities values('Athens','J3'); insert into Cities values('Athens','J4'); insert into Cities values('London','J5'); insert into Cities values('Oslo','J6'); insert into Cities values('London','J7');Now if we query the table with xSQL we see that ‘
Athens
’ and ‘London
’ have multiple objects:XSQL>select * from Cities; city jid ------------------------------------------------------------------------------ Paris J1 Rome J2 Athens J3 Athens J4 London J5 Oslo J6 London J7 Selected records: 7So if we attempt to
group by
thecity
column we get the following error:XSQL>select * from Cities group by city; ERROR: Compiler error at position 25: No aggregates are specified for GROUP BY select * from Cities group by cityInstead we need to use an aggregate function to reduce the number of results for a column that may have multiple values. For example:
XSQL>select city, max(jid) from Cities group by city; city #2 ------------------------------------------------------------------------------ Athens J4 London J7 Paris J1 Rome J2 Oslo J6 Selected records: 5Alternatively, to select all target values it is necessary to list all columns that may have multiple duplicate values in the select list and in the
group by
list. For example:XSQL>select city,jid from Cities group by city,jid; city jid ------------------------------------------------------------------------------ Athens J3 Athens J4 London J5 London J7 Oslo J6 Paris J1 Rome J2 Selected records: 7
UDF Function-Based Group By
To use a UDF in a
group by
clause, the UDF must be defined and registered as described in the user-defined functions page. The following example code demonstrates how to use a UDF named “dateformat
” to custom format date field output:const char * db_name = "groupbydb"; // Define global SQL engine using namespace McoSql; McoSqlEngine engine; // Define the structure correponding to database table Object struct _Contribution { char const * name; uint4 date_val; uint4 amount; }; _Contribution contributions[] = { { "Michael", 91224, 250 }, { "Sharon", 91224, 50 }, { "George", 100101, 100 }, { "Sharon", 100101, 150 }, { "Michael", 100321, 300 }, { "Liz", 100321, 20 }, { "Bob", 100322, 25 } }; // Declare and instantiate the user-defined DateFormat function static String* dateformat( McoSql::Value* date) { char str[64]; int date_val = (int)date->intValue(); int yy = date_val / 10000; int mm = date_val / 100 % 100; int dd = date_val % 100; // Format dd.mm.yyyy assuming all dates are after 2000 sprintf( str, "%d.%d.20%02d", dd, mm, yy ); return String::create( str ); } static SqlFunctionDeclaration udf ( tpString, // tpInt is the return value of the UDF "dateformat", // the name of the function as we’ll use it in a query (void*)dateformat, // the function pointer 1 // the number of arguments to the UDF ); int main() { ... engine.open( db_name, groupbydb_get_dictionary(), DATABASE_SIZE, MEMORY_PAGE_SIZE); // Fill database for ( int i=0; i < (int)(sizeof( contributions ) / sizeof( contributions[0] )); i++ ) { addContribution( &contributions[i] ); } // Show all objects queryContributions( "Select * from Contributions" ); // Show sub-totals by contibutor showGroup1(); // Show sub-totals by date using user-defined DateFormat showGroup2(); engine.close(); sample_pause_end("\n\nPress any key to continue . . . "); return 0; } int addContribution( _Contribution* c) { int ret = engine.executeStatement("insert into Contributions %r", c); if ( 0 == ret ) { printf("\t\tError inserting Contributions %s\n", c->name ); } return ret; } int queryContributions( char * sql ) { printf( "\n%s\n", sql); QueryResult result( engine.executeQuery( sql ) ); int count = 0; Cursor* cursor = result->records(); while ( cursor->hasNext() ) { Record* rec = cursor->next(); _Contribution c; result->extract( rec, &c, sizeof(c) ); printContribution( count+1, c ); count++; } return count; } void showGroup1() { sprintf( sql, "Select name, sum(amount) from Contributions group by name" ); printf( "\n%s\n", sql ); QueryResult result( engine.executeQuery( sql ) ); Cursor* cursor = result->records(); for (int i = 1; cursor->hasNext(); i++) { Record* rec = cursor->next(); printf( "\n\t%d)\t%s\t%f", i, ((String*)rec->get(0))->body(), rec->get(1)->realValue() ); } } void showGroup2() { sprintf( sql, "Select dateformat(date_val), sum(amount) from Contributions group by date_val" ); printf( "\n\n%s\n", sql ); QueryResult result( engine.executeQuery( sql ) ); Cursor* cursor = result->records(); for (int i = 1; cursor->hasNext(); i++) { Record* rec = cursor->next(); printf( "\n\t%d)\t%s\t%f", i, ((String*)rec->get(0))->body(), rec->get(1)->realValue() ); } }