Wednesday, 15 December 2010

How to read CSV file with commas inside / within quotation marks

Reading a CSV file becomes more complicated when it includes quoation marks with combination of commas inside a column. To overcome that issues we can use string shunting method which realy works in certain cases.

Following excercise will help you to extract a CSV file by shunting every data item after a read from the string. You must know which columns are provided with quotation makrs.
 
Step 1: Create a CSV file

create a csv file using any text editor and it should be saved with extension like filename.csv
copy and paste following text in the text file and save it.


1233222, JACK, "123 UNKNOWN STREET, UNKNOWN TOWN", 12-DEC-2020, "THE CUSTOMER IS TAGGED AS VIP CUSTOMER", 200000000
1233222, TOM, "321 UNKNOWN STREET, UNKNOWN TOWN", 12-MAR-2010, "", 7000
1233222, JERRY, "551 UNKNOWN STREET, UNKNOWN TOWN", 12-OCT-2009, "THE ACCOUNT HOLDER MAY APPLY FOR LOAN, AS THE CUSTOMER IS AN INVESTOR", 150000


Step 2: Create a database directory

C:\ mkdir  csvfiles
C:\SQLPLUS / AS SYSDBA
SQL> DROP DIRECTORY CSV_FILES;
SQL> CREATE DIRECTORY CSV_FILES AS 'C:\csvfiles';
SQL> GRANT read, write ON DIRECTORY CSV_FILES TO HR ;

Step 3: Create and compile following procedure in HR user

SQL> CONNECT HR/HR@ORCL
  CREATE OR REPLACE PROCEDURE READ_WRITE_CSV (
                                              P_FILE_LOCATION IN VARCHAR2,
                                              P_FILENAME IN VARCHAR2,
                                              P_READ_WRITE_MODE IN VARCHAR2
                                             ) as
   V_EXTRACT_FILE      UTL_FILE.FILE_TYPE;
   V_EXTRACT_LOCATION  VARCHAR2(300);
   V_READ_WRITE_MODE   VARCHAR2(1);
   V_LENGTH            number;
   V_FILE_LINE         varchar2(4000);
   account_id          varchar2(500);
   customer_name       varchar2(500);
   customer_address    varchar2(500);
   account_open_date   varchar2(500);
   comments            varchar2(500);
   opening_balance     varchar2(500);

  BEGIN

   V_EXTRACT_FILE  := UTL_FILE.fopen(P_FILE_LOCATION,P_FILENAME,P_READ_WRITE_MODE);
   --=================  
   -- string format
   --=================  
   -- account_id, customer name, customer address, account open date, comments, opening balance
   LOOP
          utl_file.get_line (V_EXTRACT_FILE,V_FILE_LINE);
          -- account id
          V_LENGTH := (INSTR(V_FILE_LINE , ',', 1, 1)) ;
          account_id := substr(V_FILE_LINE , 1,  V_LENGTH -1) ;
          V_FILE_LINE := SUBSTR( V_FILE_LINE, V_LENGTH+1 );
          --customer name
          V_LENGTH := (INSTR(V_FILE_LINE , ',', 1, 1)) ;
          customer_name := substr(V_FILE_LINE , 1,  V_LENGTH -1) ;
          V_FILE_LINE := SUBSTR( V_FILE_LINE, V_LENGTH+1 );
          --customer address
          V_LENGTH := (INSTR(V_FILE_LINE , '"', 1, 2))+1 ;
          customer_address := substr(V_FILE_LINE , 1,  V_LENGTH -1) ;
          V_FILE_LINE := SUBSTR( V_FILE_LINE, V_LENGTH+1 );
          --account open date
          V_LENGTH := (INSTR(V_FILE_LINE , ',', 1, 1)) ;
          account_open_date := substr(V_FILE_LINE , 1,  V_LENGTH -1) ;
          V_FILE_LINE := SUBSTR( V_FILE_LINE, V_LENGTH+1 );
          --comments
          V_LENGTH := (INSTR(V_FILE_LINE , '"', 1, 2))+1 ;
          comments := substr(V_FILE_LINE , 1,  V_LENGTH -1) ;
          V_FILE_LINE := SUBSTR( V_FILE_LINE, V_LENGTH+1 );
          --openning balance
           opening_balance := substr(V_FILE_LINE , 1 ) ;
         
          dbms_output.put_line(customer_name     ||'-'||
                              customer_address  ||'-'||
                              account_open_date ||'-'||
                              comments          ||'-'||
                              opening_balance
                             );
     end loop;
   EXCEPTION
    WHEN NO_DATA_FOUND THEN
    COMMIT;
    UTL_FILE.fclose(V_EXTRACT_FILE);
    dbms_output.put_line('OK') ;
    WHEN  UTL_FILE.INVALID_PATH THEN
    dbms_output.put_line('NOT OK - INVALID PATH') ;
    WHEN OTHERS THEN
    UTL_FILE.fclose(V_EXTRACT_FILE);
    DBMS_OUTPUT.PUT_LINE(SQLERRM) ;
   END;
/
SQ>

Step 4: Execute procedure to read CSV file

SQL> SET SERVEROUTPUT ON
SQL> EXECUTE READ_WRITE_CSV ('CSV_FILES', 'customer.csv', 'R' ) ;