快生活 - 生活常识大全

使用工具进


  通过csv文件提供的数据库表内容:
  links.csv的格式:
  movies.csv格式,一个movie可以有多种风格(genres),通过|分隔:
  ratings.csv:
  用户给movie打得分:
  tags.csv:movie的标签
  练习一:
  列出四张表的总记录数:
  select "links" as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.LINKS"union allselect "movies" as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES"union allselect "ratings" as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"union allselect "tags" as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.TAGS";
  执行结果:
  练习2:计算总共9125部电影,一共包含多少艺术类别?
  DOBEGIN DECLARE genreArray NVARCHAR(255) ARRAY; DECLARE tmp NVARCHAR(255); DECLARE idx INTEGER; DECLARE sep NVARCHAR(1) := "|"; DECLARE CURSOR cur FOR SELECT DISTINCT "GENRES" FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES"; DECLARE genres NVARCHAR (255) := ""; idx := 1; FOR cur_row AS cur() DO SELECT cur_row."GENRES" INTO genres FROM DUMMY; tmp := :genres; WHILE LOCATE(:tmp,:sep) > 0 DO genreArray[:idx] := SUBSTR_BEFORE(:tmp,:sep); tmp := SUBSTR_AFTER(:tmp,:sep); idx := :idx + 1; END WHILE; genreArray[:idx] := :tmp; END FOR; genreList = UNNEST(:genreArray) AS ("GENRE"); SELECT "GENRE" FROM :genreList GROUP BY "GENRE";END;
  执行结果,总共包含18种:
  练习3:计算每种艺术类别总共包含多少部电影:
  DOBEGIN DECLARE genreArray NVARCHAR(255) ARRAY; DECLARE tmp NVARCHAR(255); DECLARE idx INTEGER; DECLARE sep NVARCHAR(1) := "|"; DECLARE CURSOR cur FOR SELECT DISTINCT "GENRES" FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES"; DECLARE genres NVARCHAR (255) := ""; idx := 1; FOR cur_row AS cur() DO SELECT cur_row."GENRES" INTO genres FROM DUMMY; tmp := :genres; WHILE LOCATE(:tmp,:sep) > 0 DO genreArray[:idx] := SUBSTR_BEFORE(:tmp,:sep); tmp := SUBSTR_AFTER(:tmp,:sep); idx := :idx + 1; END WHILE; genreArray[:idx] := :tmp; END FOR; genreList = UNNEST(:genreArray) AS ("GENRE"); SELECT "GENRE", count(1) FROM :genreList GROUP BY "GENRE";END;
  练习4:列出每部电影包含的风格数目:
  SELECT "MOVIEID" , "TITLE" , OCCURRENCES_REGEXPR("[|]" IN GENRES) + 1 "GENRE_COUNT" , "GENRES"FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES"ORDER BY "GENRE_COUNT" ASC;
  练习5:罗列出每部电影的风格分布情况
  SELECT "GENRE_COUNT" , COUNT(1)FROM ( SELECT OCCURRENCES_REGEXPR("[|]" IN "GENRES") + 1 "GENRE_COUNT" FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES")GROUP BY "GENRE_COUNT" ORDER BY "GENRE_COUNT";
  比如至少拥有1个风格的电影,有2793部,2个风格的电影有3039部,等等。
  练习6:计算movie的rating分布情况
  SELECT DISTINCT MIN("RATING_COUNT") OVER( ) AS "MIN", MAX("RATING_COUNT") OVER( ) AS "MAX", AVG("RATING_COUNT") OVER( ) AS "AVG", SUM("RATING_COUNT") OVER( ) AS "SUM", MEDIAN("RATING_COUNT") OVER( ) AS "MEDIAN", STDDEV("RATING_COUNT") OVER( ) AS "STDDEV", COUNT(*) OVER( ) AS "CATEGORY_COUNT"FROM ( SELECT "MOVIEID", COUNT(1) as "RATING_COUNT" FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS" GROUP BY "MOVIEID")GROUP BY "RATING_COUNT";
  明细情况:
  SELECT "RATING_COUNT", COUNT(1) as "MOVIE_COUNT"FROM ( SELECT "MOVIEID", COUNT(1) as "RATING_COUNT" FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS" GROUP BY "MOVIEID")GROUP BY "RATING_COUNT" ORDER BY "RATING_COUNT" asc;
  比如有397部电影的用户投票数为5票
  练习7:统计用户投票情况
  SELECT "RATING_COUNT", COUNT(1) as "USER_COUNT"FROM ( SELECT "USERID", COUNT(1) as "RATING_COUNT" FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS" GROUP BY "USERID")GROUP BY "RATING_COUNT" ORDER BY 1 DESC;
  有一位用户投了2391票,一位用户投了1868票:
  练习8:统计用户投票得分情况
  SELECT "RATING", COUNT(1) as "RATING_COUNT"FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"GROUP BY "RATING" ORDER BY 1 DESC;
  有15095份用户投票,打的分数是5分
  要获取更多Jerry的原创文章,请关注公众号"汪子熙":
网站目录投稿:晓露