Tuesday, August 25, 2015

Yet another CSV -> Table but with pipleline function

Here's just one more variation on how to get a CSV into a table format.  It could have been done before but my google-fu couldn't find it anywhere.

First to get some sample data using the /*csv*/ hint in sqldev.




Then the results of putting it back to a table. The inline plsql is just to convert the text into a CLOB.



Now the details.

The csv parsing is completely borrowed(stolen) from another Chris where he create a csv parser in plsql here.

The changes I made which is probably obvious to use a plsql object and table of said object to put into a form that can be use in sql.


I'm sure there's optimizations that could be added in here since I almost never use plsql object. But here's the code that renders the above results.



drop function parse_csv;
drop type t_csv_col;
drop type t_csv_table;

CREATE OR REPLACE TYPE t_csv_table IS OBJECT
(
    col1   varchar2(4000),
    col2   varchar2(4000),
    col3   varchar2(4000),
    col4   varchar2(4000),
    col5   varchar2(4000),
    col6   varchar2(4000),
    col7   varchar2(4000),
    col8   varchar2(4000),
    col9   varchar2(4000),
    col10   varchar2(4000)
)
/

show errors

CREATE or replace TYPE t_csv_COL IS TABLE OF t_csv_table
/

show errors



create or replace function parse_csv(
  p_clob clob,
  p_delim varchar2 default ',',
  p_optionally_enclosed varchar2 default '"' ) 
  return t_csv_COL
  
  is
  --
  CARRIAGE_RETURN constant char(1) := chr(13);
  LINE_FEED constant char(1) := chr(10);
  --
  l_char char(1);
  l_lookahead char(1);
  l_pos number := 0;
  l_token varchar2(32767) := null;
  l_token_complete boolean := false;
  l_line_complete boolean := false;
  l_new_token boolean := true;
  l_enclosed boolean := false;
  --
  l_lineno number := 1;
  l_columnno number := 1;
 
  -- additions from Kris
  l_ret t_csv_COL;
  l_ret_row t_csv_table;
  l_ret_index number;
begin
  -- initialize things
  l_ret := t_csv_COL();
  l_ret.extend;
  l_ret_row := t_csv_table(null,null,null,null,null,null,null,null,null,null);
  loop
    -- increment position index
    l_pos := l_pos + 1;
 
    -- get next character from clob
    l_char := dbms_lob.substr( p_clob, 1, l_pos);
 
    -- exit when no more characters to process
    exit when l_char is null or l_pos > dbms_lob.getLength( p_clob );
 
    -- if first character of new token is optionally enclosed character
    -- note that and skip it and get next character
    if l_new_token and l_char = p_optionally_enclosed then
      l_enclosed := true;
      l_pos := l_pos + 1;
      l_char := dbms_lob.substr( p_clob, 1, l_pos);
    end if;
    l_new_token := false;
 
    -- get look ahead character
    l_lookahead := dbms_lob.substr( p_clob, 1, l_pos+1 );
 
    -- inspect character (and lookahead) to determine what to do
    if l_char = p_optionally_enclosed and l_enclosed then
 
      if l_lookahead = p_optionally_enclosed then
        l_pos := l_pos + 1;
        l_token := l_token || l_lookahead;
      elsif l_lookahead = p_delim then
        l_pos := l_pos + 1;
        l_token_complete := true;
      else
        l_enclosed := false;
      end if;
 
    elsif l_char in ( CARRIAGE_RETURN, LINE_FEED ) and NOT l_enclosed then
      l_token_complete := true;
      l_line_complete := true;
 
      if l_lookahead in ( CARRIAGE_RETURN, LINE_FEED ) then
        l_pos := l_pos + 1;
      end if;
 
    elsif l_char = p_delim and not l_enclosed then
      l_token_complete := true;
 
    elsif l_pos = dbms_lob.getLength( p_clob ) then
      l_token := l_token || l_char;
      l_token_complete := true;
      l_line_complete := true;
 
    else
      l_token := l_token || l_char;
    end if;
           
    -- process a new token
    if l_token_complete then
      -- dbms_output.put_line( 'R' || l_lineno || 'C' || l_columnno || ': ' || nvl(l_token,'**null**') );
       -- assign the column value
      case  l_columnno 
          when 1 then  l_ret_row.col1 := l_token;
          when 2 then  l_ret_row.col2 := l_token;
          when 3 then  l_ret_row.col3 := l_token;
          when 4 then  l_ret_row.col4 := l_token;          
          when 5 then  l_ret_row.col5 := l_token;
          when 6 then  l_ret_row.col6 := l_token;
          when 7 then  l_ret_row.col7 := l_token;          
          when 8 then  l_ret_row.col8 := l_token;
          when 9 then  l_ret_row.col9 := l_token;
          when 10 then l_ret_row.col10 := l_token;
          else dbms_output.put_line('Only supports up to 10:'||l_columnno);
      end case;
                            
      l_columnno := l_columnno + 1;
      l_token := null;
      l_enclosed := false;
      l_new_token := true;
      l_token_complete := false;
    end if;
 
    -- process end-of-line here
    if l_line_complete then
      l_ret(l_lineno):= l_ret_row;
      l_lineno := l_lineno + 1;
      l_columnno := 1;
      l_line_complete := false;
      l_ret.extend;
      l_ret_row := t_csv_table(null,null,null,null,null,null,null,null,null,null);

    end if;
  end loop;
  return l_ret;
end parse_csv;
/
show errors