Showing posts with label Perl. Show all posts
Showing posts with label Perl. Show all posts

Tuesday, August 12, 2003

Importing and converting 600 word documents into a database as clean html

The following script is used to import 600 Word documents into a database as cleaned html.

#!c:\oracle\ora81/Apache/perl/5.00503/bin/MSWin32-x86/perl.exe

#################################################################################
# Requirements:
# ------------
#
# - Perl 5.00503 or later installed and working on your machine
# - change the first line in this script to point to the perl executable
#
#
#
# Modules:
# --------
#
# - you will need (HTML::Parser, HTML::Treebuilder )
# - install instrucitons
#      win32: (you need Visual Studio to install manually. If you are using ActiveState
#             Perl then use the Perl Package Manager they provide else see below)
#          1. go to www.cpan.org browse the modules and find the above mentioned
#             modules.
#          2. download
#          3. Unzip/Untar
#          4. Open a command prompt in the HTML::Parser unziped directory
#          5. type> perl makefile.pl
#          6. type> nmake
#          7. type> nmake test
#          8. type> nmake install
#          9. do the same for the HTML::Treebuilder directory
#
#
#
#
# Description:
# ------------
#
# This script was created to import 600 word documents as html into a Oracle database.
#
# This script is step 2 in an migration process including an vbs (Visual Basic Script)
#
# Step 1: Convert all documents from MS Word to HTML
#
#           Option Explicit
#
#           Dim wordObj, fsObj
#
#           Set wordObj = WScript.CreateObject("Word.Application")
#           Set fsObj = WScript.CreateObject("Scripting.FileSystemObject")
#
#           ' SubRoutine call convert all files in folder to HTML
#           WordToHTML fsObj.GetFolder("..")
#
#           wordObj.Quit
#
#           Set fsObj = Nothing
#           Set wordObj = Nothing
#
#
#
#           '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#           ' Subroutines
#           '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
#           Sub WordToHTML(Folder)
#              On Error Resume Next
#              Dim File
#              For Each File In Folder.Files
#                 Set opgaveDoc = wordObj.Documents.Open (File.Path)
#                 wordobj.ActiveDocument.SaveAs replace(replace(replace(Replace(file.path, " ", "_"),"å","aa"),"æ","ae"),"ø","oe") & ".htm", 8
#                 wordobj.ActiveDocument.close
#              Next
#           End Sub
#
#
#
# Step 2: this script
#         - clean the MS HTML using HMTLTidy
#         - locate the data in the cleaned html document
#         - clean data of unneccesary formating
#         - insert the found data in the wanted format (HTML template)
#         - generate SQL statement to insert task into database
#         - execute SQL statement
#         - log the process
#
#################################################################################


#################################################################################
# INCLUDE MODULES
#################################################################################

#use Carp;
#use diagnostics;
use DBI;
use HTML::TreeBuilder;



#################################################################################
# VARABLE DECLARATION
#################################################################################

my $user    = "USERNAME";                    # database username
my $passwd  = "PASSWORD";                    # database password
my $driver  = "Oracle";                   # DBD (DataBase Driver)
my $db_name = "DATABASENAME;                    # database server instance name (tnsname)

my $pathHtmlBackup = ".\\temp\\";         # where to put the original backup file
my $pathLogFile = ".\\";                  # where to put the processing log file
my $pathDocuments = ".\\doc\\";           # path to where the word documents are located

my $doDatabaseUpdate = 1;                 # used during development 0=off, 1=on

my $DBITraceEnabled = 0;                  # If you want DBI tracing: 1=on, 0=off
my $DBITraceLevel = 2;                    # DBI tracing level (1-9). 1 or 2 should be sufficient
my $DBITraceFilePath = "c:\\trace.log";   # Path to where you want the trace file. On win32 remember to escape the \ with
                                          # another \ so "c:\trace.log" would become "c:\\trace.log"



#################################################################################
# SUB ROUTINE DECLARATION
#################################################################################

sub decodeHTMLTask;



#################################################################################
# MAIN SCRIPT FLOW
#################################################################################

print "Connecting to database. Please wait ...\n";

my $dataSource = "dbi:$driver:${db_name}";
my $dbh = DBI->connect($dataSource, $user, $passwd) || die "Error connecting to database: $DBI::errstr\n";
if ($DBITraceEnabled) {$dbh->trace($DBITraceLevel, $DBITraceFilePath);}

# Make sure buffer is big enough for CLOB, BLOB
$dbh->{LongReadLen}=500 * 1024;  # 500KB

# Getting all the html docs from $pathDocuments
@htmlFileList = `dir /b *.htm`;
chop @htmlFileList;

open (LOGFILE, ">migration.log.txt") || die ("Could not open log file");

# Process each an every file with a html extension found in the current directory
foreach $file (@htmlFileList) {

   # Backup the converted word document
   $cmd = `copy $file ${pathHtmlBackup}${file}.org.html`;
   print LOGFILE ("Backup file : copy $file ${pathHtmlBackup}${file}.org.html\n");

   # Run the html file through HTMLTidy which cleans out the html
   $cmd = `TidyDbg.exe -config clean.cfg $file`;
   print LOGFILE ("Clean file  : TidyDbg.exe -config clean.cfg $file\n");

   # Move the cleaned html file
   $cmd = `move $file ${pathHtmlBackup}${file}.clean.html`;
   print LOGFILE ("Move file   : move $file ${pathHtmlBackup}${file}.clean.html\n");

   open (CLEANHTML, "${pathHtmlBackup}${file}.clean.html") || die ("Cannot open file ${pathHtmlBackup}${file}.clean.html\n");
   open (CLEANNOBRHTML, ">${pathHtmlBackup}${file}.clean.nobr.html") || die ("Cannot open file ${pathHtmlBackup}${file}.clean.nobr.html\n");

   # Removing all newline characters (editlive does not like them)

   # Read the file assocsiated with filehandle CLEANHTML into array @lines
   @lines = <CLEANHTML>;

   # Chop of the last character in every item in array
   #chop (@lines);

   # Join the array to a single string
   $line = join ("",@lines);
   $line =~ s/\n/ /g;

   print CLEANNOBRHTML $line;

   close (CLEANNOBRHTML);
   close (CLEANHTML);

   # Build insert statement based on information found in the html file
   &decodeHTMLTask($file);
} # end foreach

close (LOGFILE);


#################################################################################
# SUB ROUTINES
#################################################################################



#################################################################################
# COMMENTS: "decodeHTMLTask"
#---------------------------
#
# INPUT  : FILENAME (TEXT STRING) EX. something.html
# FUNCTION: PARSE THE FILES DOCUMENT TREE STRUCTURE AND GRAB THE INFORMATION
#           WHICH IS NEEDED. BUILD THE SQL INSERT STATEMENT BASED ON THIS
#           INFORMATION.
# OUTPUT  : SQL INSERT STATEMENT
#################################################################################
sub decodeHTMLTask{
   my ($file) = @_;

   my $tree = HTML::TreeBuilder->new;
   $tree->parse_file("${pathHtmlBackup}${file}.clean.nobr.html");

   #$tree->dump;           # Uncomment this line for document tree dump
   print LOGFILE ("Processing  : ${pathHtmlBackup}${file}.clean.nobr.html...\n");
   print ("Processing  : ${pathHtmlBackup}${file}...\n");

   # Get title from html document.
   my $tref = $tree->content->[1]->content->[0]->content->[2]->content->[1]->content->[1];
   my $taskTitle = $tref->as_text;

   # Get stage (PÃ¥ scenen) from html document
   my $tref = $tree->content->[1]->content->[0]->content->[2]->content->[2]->content->[0];
   my $onStage = $tref->as_text;
   # Since we know that stage has to be either onStage or offStage we already know
   # which one it is.

   # Get priority (Need) from html document
   my $tref = $tree->content->[1]->content->[0]->content->[2]->content->[2]->content->[2];
   my $need = $tref->as_text;
   # Since we know that need has to be either need or nice we already know
   # which one it is.

   # Determinig what priority id we need
   if ($onStage eq "x") {
      if ($need eq "x") {
         $priorityId = 1;
      }
      else {
         $priorityId = 2;
      }
   }
   else {
      if ($need eq "x") {
         $priorityId = 3;
      }
      else {
         $priorityId = 4;
      }
   }

   # Get (Hvorfor) from html document
   my $tref = $tree->content->[1]->content->[0]->content->[2]->content->[4]->content->[1];
   my $hvorfor = $tref->as_text;

   # Get (Hvordan) from html document
   my $tref = $tree->content->[1]->content->[0]->content->[2]->content->[6]->content->[0];
   my $hvordan = $tref->as_HTML;

   # We need to remove a start/tags td (tabledata) fomr the hvordan data
   # to be able to use the data in to input template
   $hvordan =~ s/<td colspan=11 valign="top">//ig;
   $hvordan =~ s/<\/td>//ig;

   # Get (Links) from html document
   my $tref = $tree->content->[1]->content->[0]->content->[2]->content->[8]->content->[0]->content->[0];
   my $links = $tref->as_HTML;

   # Clear memory
   $tree->delete;

   $sql = "select task_id_seq.nextval from dual";
   $sth = $dbh->prepare($sql) || die "\nPrepare error: $dbhI::err .... $dbhI::errstr\n";;
   $sth->execute()  || die "\nExecute error: $dbhI::err .... $dbhI::errstr\n";
   $sth->bind_col( 1, \$task_id );
   $sth->fetch();

   # Building the task description text
   my $HTMLTask = "<br><TABLE cellSpacing=0 cellPadding=0 border=1><TR><TD vAlign=top width=571><H5><FONT face=Verdana>Hvorfor</FONT></H5><P><FONT face=Verdana>$hvorfor</FONT></P><P>&nbsp;</P></TD></TR><TR><TD vAlign=top width=571><H5><FONT face=Verdana>Hvordan</FONT></H5><P><FONT face=Verdana>$hvordan</FONT></P><P>&nbsp;</P></TD></TR><TR><TD vAlign=top width=571><H5><FONT face=Verdana>Links</FONT></H5><P><FONT face=Verdana>$links</FONT></P><P>&nbsp;</P></TD></TR></TABLE><br>";
   $HTMLTask =~ s/\n/ /g;

   # SQL statement needed to inser task into database
   my $sqlInsertTask = "
      INSERT INTO TASK (
         task_id,
         gro_id,
         name,
         task_flag,
         text,
         priority_id,
         user_created,
         date_created,
         user_changed,
         date_changed)
      VALUES (
         :task_id,
         task_admin_pack.getPrimgroid(),
         :taskTitle,
         'T',
         :HTMLTask,
         :priorityId,
         'Task massload script',
         sysdate,
         'Task massload script',
         sysdate)
   ";

   $sth = $dbh->prepare($sqlInsertTask) || die "\nPrepare error: $dbhI::err .... $dbhI::errstr\n";;
   $sth->bind_param(":task_id", $task_id);
   $sth->bind_param(":taskTitle", $taskTitle);
   $sth->bind_param(":HTMLTask", $HTMLTask);
   $sth->bind_param(":priorityId", $priorityId);
   $sth->execute()  || die "\nExecute error: $dbhI::err .... $dbhI::errstr\n";

   print LOGFILE ("Execute SQL : INSERT INTO TASK (task_id, gro_id, name, task_flag, text, priority_id, user_created, date_created, user_changed, date_changed) VALUES ($task_id, TASK_admin_pack.getPrimgroid(), '$taskTitle', 'T', '$HTMLTask', '$priorityId', 'Task massload script', sysdate, 'Task massload script', sysdate)\n;");

   # Generate insert statement needed to insert into all search
   my $sqlInsertAllSearch = "
      INSERT INTO all_search (
         as_id,
         table_id,
         all_searchfield,
         com_id,
         gro_id,
         pro_id,
         table_name,
         id_field_name,
         owner_field_name,
         access_table_name)
      values (
         as_seq.nextval,
         :task_id,
         :v_temptext,
         :v_componentID,
         NULL,
         NULL,
         NULL,
         NULL,
         NULL,
         NULL)
   ";

   $sth = $dbh->prepare($sqlInsertAllSearch) || die "\nPrepare error: $dbhI::err .... $dbhI::errstr\n";;
 $sth->bind_param(":task_id", $task_id);
 $sth->bind_param(":v_temptext", $taskTitle . " " . $HTMLTask);
 $sth->bind_param(":v_componentID", 42);
   $sth->execute()  || die "\nExecute error: $dbhI::err .... $dbhI::errstr\n";

   print LOGFILE ("Execute SQL : INSERT INTO all_search (as_id, table_id, all_searchfield, com_id, gro_id, pro_id, table_name, id_field_name, owner_field_name, access_table_name) VALUES (as_seq.nextval, $p_taskid,  '$taskTitle $HTMLTask', $v_componentID, $v_groid, NULL, NULL, NULL, NULL, NULL);\n");

}

This is the configuration settings use together with Html Tidy for windows

word-2000: yes
show-body-only: yes
clean: yes
char-encoding: raw
quiet: yes
show-errors: 0
show-warnings: no
drop-empty-paras: no
indent-spaces: 3
new-inline-tags: o:p
doctype: omit
tidy-mark: no
quote-ampersand: no
write-back: yes
force-output: yes

Thursday, March 20, 2003

PERL upload and download image to database (code nostalgia)

I can across an one of my old storage hard disks which I use as backup. I plugged it in and went exploring and found a few programs PERL scripts from 1996 :). I do vaguely remember the HTML mixed code.

These 2 scripts was responsible for the download and upload of images to a image database in an Oracle database. It actually ended up serving up to 60K page-hits a day where each page contained about 5-10 pictures for a few years and performed acceptably at the time.

Pure nostalgia :)

Here is the upload script:

#!c:\oracle\oraport/Apache/perl/5.00503/bin/MSWin32-x86/perl.exe
#
# FILE NAME:
# mediaUpload.cgi
#
# DESCRIPTION:
#   Script responsible for the binary upload of BLOBs over the http web protocol.
#   The file will be inserted into a database as well as uploaded to a specified
#   $SAVE_DIRECTORY on the webserver.

########################### User defined variables #############################

#$ENV{ORACLE_HOME} = '/u02/oracle/ora817';
#$ENV{NLS_LANG} = 'danish_denmark.we8iso8859p1';
#$ENV{ORA_NLS33} = '/u02/oracle/ora817/ocommon/nls/admin/data';

my $user    = "USERNAME";
my $passwd  = "PASSWORD";
my $driver  = "Oracle";
my $db_name = "DATABASENAME";

my $SAVE_DIRECTORY = "/images";
my $NT_SERVER = 0;


################################################################################


use CGI qw(:standard);
my $query = new CGI;
use CGI qw(:cgi-lib);
use DBI qw(:sql_types);



# Parse arguments
local *in;
&ReadParse(*in);

# Flush StdOut
$| = 1;



# Used for debugging the scripts. If a debugging variable is sent as a
# parameter in the query_string then set debugging mode on.
if ($ENV{'QUERY_STRING'} =~ /debug/g) {
 my $debugMode= $in{'debug'};
}
else {
 my $debugMode = 0;
}





#print &PrintHeader;

my $dataSource = "dbi:$driver:${db_name}";
my $dbh = DBI->connect($dataSource, $user, $passwd) || die "Error connecting to database: $DBI::errstr\n";

&insert_data;

exit;


sub insert_data() {

 # Upload binary data
 foreach $key (sort {$a <=> $b} $query->param()) {
  next if ($key =~ /^\s*$/);
  next if ($query->param($key) =~ /^\s*$/);
  next if ($key !~ /^media:/);
  ($garbage, $filenumber) = split(/:/,$key);

  $filenumber =~ s/file//;
  $desc = "desc" . $filenumber;
  $mime = "mime" . $filenumber;
        $cat = "cat" . $filenumber;

  $Number = $1;

  if ($query->param($key) =~ /([^\/\\]+)$/) {
   $Filename = $1;
   $Filename =~ s/^\.+//;
   $File_Handle = $query->param($key);
   $no_bytes = (stat ($File_Handle))[7];
  }

     undef $BytesRead;
  undef $Buffer;

  read ($File_Handle, $Buffer, $no_bytes);

  # Get the next id from the database
  my $sqlId = "
      SELECT icseq_media_elements.NEXTVAL
            FROM dual";
        my $id = $dbh->selectrow_array( $sqlId );

        $string = $File_Handle;
        $string =~ s/(.*)\.(.*)//g;
        $ext = $2;


  # Save image to database
  $sqlInsert = "
   INSERT INTO ic_media_elements
    (media_id,
     media,
     mime_type,
     org_file_name,
     file_size,
     description,
     category_id,
     user_created,
     date_created,
     user_changed,
     date_changed)
   VALUES
    (:media_id,
     :media,
     :mime_type,
     :org_file_name,
     :file_size,
     :description,
     :category_id,
     :user_created,
      SYSDATE,
     :user_changed,
      SYSDATE)";

  $sth = $dbh->prepare($sqlInsert);
  $sth->bind_param(":media_id", $id);
  $sth->bind_param(":media", $Buffer, SQL_BINARY);
  $sth->bind_param(":mime_type", $in{$mime});
  $sth->bind_param(":org_file_name", $Filename);
  $sth->bind_param(":file_size", $no_bytes);
  $sth->bind_param(":description", $in{$desc});
  $sth->bind_param(":category_id", $in{$cat});
  $sth->bind_param(":user_created", "administrator");
  $sth->bind_param(":user_changed", "administrator");
  $sth->execute();


        # Save image to file
        if (!open(OUTFILE, ">$SAVE_DIRECTORY\/$id\.$ext")) {
            print "Content-type: text/plain\n\n";
            print "-------------------------\n";
            print "Error:\n";
            print "-------------------------\n";
            print "File: $SAVE_DIRECTORY\/$Filename\n";
            print "-------------------------\n";
         print "There was an error opening the Output File\n";
         print "for Writing.\n\n";
         print "Make sure that the directory:\n";
         print "$SAVE_DIRECTORY\n";
         print "has been chmodded with the permissions '777'.\n\n";
         print "Also, make sure that if your attempting\n";
         print "to overwrite an existing file, that the\n";
         print "existing file is chmodded '666' or better.\n\n";
         print "The Error message below should help you diagnose\n";
         print "the problem.\n\n";
         print "Error: $!\n";
            exit;
        }
  if ($NT_SERVER) {
   binmode(OUTFILE);
  }
  {next;}
        print OUTFILE $Buffer;
     close(OUTFILE);


  push(@Files_Written, "$SAVE_DIRECTORY\/$Filename");

  $TOTAL_BYTES += $no_bytes;
  $Confirmation{$File_Handle} = $no_bytes;

     close($File_Handle);
  $FILES_UPLOADED = scalar(keys(%Confirmation));
 }
 print header;
 print <<__END_OF_HTML_CODE__;
  <html>
  <head>
  <title>Resultat af data upload</title>
        <link rel="stylesheet" href="/cm.css/CMfont.css" type="text/css">
  </head>

  <body bgcolor="#FFFFFF" text="#000000">

  <table width="560" border="1" cellspacing="0"  align="center">
   <tr bgcolor="#FFFF00">
    <td colspan="3" class="divText">
         Du har uploadet de f&oslash;lgende <b>$FILES_UPLOADED</b> fil(er) til $driver databasen "$db_name".<BR>
         Den totale datam&aelig;ngde var p&aring; <B>$TOTAL_BYTES</B> bytes.<BR>
    </td>
   </tr>
   <tr bgcolor="#CCCCCC">
    <td width="400" class="divTextH2">Fil</td>
    <td width="160" class="divTextH2">St&oslash;rrelse </td>
   </tr>
__END_OF_HTML_CODE__

 foreach $key (keys (%Confirmation)) {
  print <<__END_OF_HTML_CODE__;
   <tr bgcolor="#FFFFFF">
    <td width="400" class="divText">$key</td>
    <td align=right width="160" >$Confirmation{$key} bytes</td>
    <tr>
__END_OF_HTML_CODE__
 }

 print <<__END_OF_HTML_CODE__;
  </TABLE>
  </body>
  </html>
__END_OF_HTML_CODE__

 exit;
} 


and the download script



#!c:\oracle\oraport/Apache/perl/5.00503/bin/MSWin32-x86/perl.exe
#
# FILE NAME:
# media.cgi
#
#
# DESCRIPTION:
# Retrieves an media elements (jpeg, gif, mov ..) from the database and shows
# it in the same way as if it had been on the filesystem.
#
# ABSTRACT:
# Based on a 'page_id' and the 'insert_area_name' sent as parameters issue an
# SQL to the database retrieve media and the mime_type columns from the specific
# row found.
#
#
# HISTORY:
# Date  Developer   Version Description Of Changes Made
# ----  ---------   ------- ---------------------------
#   14/7-2001   Kenneth Thorman  1.0   Original

########################### User defined variables #############################

$ENV{ORACLE_HOME} = '/u02/oracle/ora817';
$ENV{NLS_LANG} = 'danish_denmark.we8iso8859p1';
$ENV{ORA_NLS33} = '/u02/oracle/ora817/ocommon/nls/admin/data';

my $user    = "USERNAME";
my $passwd  = "PASSWORD";
my $driver  = "Oracle";
my $db_name = "DATABASENAME";



################################################################################

use CGI qw(:standard, :cgi-lib);
use DBI qw(:sql_types);

$query = new CGI;

local *in;
&ReadParse(*in);

$dataSource = "dbi:$driver:${db_name}";
$dbh = DBI->connect($dataSource, $user, $passwd);

# Make sure buffer is big enough for CLOB, BLOB (200KB)
$dbh->{LongReadLen}= 200 * 1024;

# SQL statement to select an media based on page_id, and insert_area_name
$sql = "
 SELECT media, mime_type
 FROM ic_media_elements
 WHERE media_id = :media_id";
$sth = $dbh->prepare($sql);

# Bind the input values to named place holders to allow for caching
$sth->bind_param(":media_id", $in{'media_id'});
$sth->execute();

# Binding output columns to minimize variables used. 
$sth->bind_col( 1, \$media );
$sth->bind_col( 2, \$mime_type );
$sth->fetch();

print $query->header($mime_type), $media;

$sth->finish;
$dbh->disconnect;

Saturday, September 30, 2000

Seismic P1/90 Change/offset shotpoint number

Again one of those old seismic scripts pulled out of of my old failing harddisk. This one allows you to increment/decrement all the shotpoint numbers in a P1/90 file by a certain amount.

Once in a while the navigation department inputs the wrong SP start number
referenced to the preplots ex. start SP 3500 when it should be 3490. This script
takes as input the filename of the file you want to change, and the amount of SP
offset the recorded (wrong) SP number by.

IMPORTANT:
----------
You must add a "+" or a "-" in front of the SP change to be applied
 

SYNTAX: change_sp <FILENAME> <+/- SP TO CHANGE>

#!/opt/perl5/bin/perl

# Sub routine declaration
sub CheckCommandLineSyntax;
sub DisplayOnlineHelp;


# Main script
CheckCommandLineSyntax;
$outfile = "$ARGV[0]" . ".SPcor";

open (P190, "$ARGV[0]") or die ("Could not open $file.\n");
open (OUT, ">$outfile") or die ("Could not open $file.\n");
  $x = 0;
  $line = <P190>;
  while ($line ne "") {
    
    if (($line =~ /^V/g) or ($line =~ /^E/g) or ($line =~ /^S/g) or ($line =~ /^C/g) or ($line =~ /^T/g)) {
      $SPInFile = substr ($line,20,5);
      $CorrectedSP = eval($SPInFile . $ARGV[1]);
      
      $CorrectedSP = "      " . $CorrectedSP;
      $CorrectedSP = substr($CorrectedSP, length($CorrectedSP) - 5, length($CorrectedSP));
      
      $line =~ s/$SPInFile/$CorrectedSP/g;
      print OUT ($line);      
      $x++;
      if ($x == 500) {
        print ("Now processing: \'$SPInFile\'         ->         \'$CorrectedSP\'\n");
        $x = 0;
      }
    }
    else {
      print OUT ($line);      
    }    
    $line = <P190>;
  }
close(P190);
close(OUT);




# Sub routine implementation

sub CheckCommandLineSyntax{
  $arg_cnt = @ARGV;            
  if ($arg_cnt != 2){
    DisplayOnlineHelp;
    die ("SYNTAX: change_sp <FILENAME> <+/- SP TO CHANGE> ...\n\n");
  }
}

sub DisplayOnlineHelp{
  $~ = "HELP";
  write;
}
format HELP =

                change_sp  v1.0
                  by
                Kenneth Thorman
                      
DESCRIPTION:
------------
Once in a while the navigation department inputs the wrong SP start number
referenced to the preplots ex. start SP 3500 when it should be 3490. This script 
takes as input the filename of the file you want to change, and the amount of SP 
offset the recorded (wrong) SP number by. 

IMPORTANT:
----------
You must add a "+" or a "-" in front of the SP change to be applied

Script   : change_sp
Version  : 1.0
Author   : Kenneth Thorman
Date     : 21/6 - 2000

.

Thursday, June 22, 2000

Perl P2/91 & P2/94 fathometer data correction script (code nostalgia)

This is the last posting of a series of postings initiated by me finding an old backup hard disk containing some scripts written when I was working in the Marine Seismic industry.

I remember hacking these scripts in the ed editor on HP-UX, with no version control apart from manually maintaining the versions in different directories. Code wise the script is not something to look at for code structure etc... but it worked well at the time over a period of almost 3 years.

Perl 5 did have full support for classes but for some reason I did not implement this using OO.

The help information in the script states:

DESCRIPTION:
------------
This script comes in handy when your echosounder data in the P291/P294
file either is extremly noisy or non existing. The script can:
- correct noisy data, by deleting corrupt data and inserting new data
- insert new data where data is missing
The script needs the P291/P294 file with the bad data, also needed is a
"WD" file where each line contains a data pair of (SP/Water depths).


WHERE TO GET THE WD FILE:
-------------------------
SEISMIC data processors. To see more help type: "correct_P2_fatho HELPSEISMIC"
at the command prompt.

This version the script allows multiple SP ranges in the WD file.
What happens is that when the SEISMIC data operators pick the water bootom for the
SP range that are bad, the normally pick all SP ranges at one time so you
get at WD file consisting of several SP range-datasets like this (SP/depth):

 992      1285    (start first SP range)
  .         .
 999      1022    (end first SP range)
1245      1000    (start second SP range)
  .         .
1255       995    (end second SP range)   
The script will insert or remove the values for the SP ranges in the WD file
(in this example: 992-999, 1245-1255). The SP range in between the 2 ranges will
not be changed and will retain the original values from the echosounder.


IMPORTANT:
----------
The water depth file cannot have any text/empty lines in the beginning
of the file. The last line must be the last data set (no empty lines).
The script does not care about spaces or TABs on a line. Leading spaces
in a line is no problem. Some times the file from SEISMIC data processing can have double
values for the same shotpoint, this is not a problem.

 992      1285    (double values, not a problem)
 992      1285
      993              1285    (different spacing, not a problem)

and for seismic data processors the help states the following

             correct_p2_fatho v1.3
                  by
                Kenneth Thorman
             SEISMIC data - HELP
                     
1. Migrate the Near Trace Gather
2. Pick the water depth of migrated near trace gather in QCTOOL
3. Output formatted listing of DBU (this will be shotrec/time pairs)
4. In DBU examine file created in QCTOOL

EXAMPLE DBU FORMATTED OUTPUT LISTING

  Enter project name [exnor] :
  Enter access mode (r[eadonly], u[pdate]) [readonly] : u
  Enter selection (h|...|q) [l] : e 17awbpick
  Enter examine option (h|l|o|d|f|g|...|q) [l] : o
  Enter output listing file name (or "stdout") [stdout] : 17wblist.txt
  Enter examine option (h|l|o|d|f|g|...|q) [l] : f
  Enter filename of an existing format [] :
  Enter maximum line length for the list (max 255) [79] :
  Enter number of chars for instance number (0=none) [0] :
  Enter repeat factor for header line (0=no header) [50] : 0
  Select fields to tabulate :
  Enter field (q to quit, \ to list) [KEY1] :
  Enter number of characters to display field data in [4] : 8
  Enter field (q to quit, \ to list) [key1] : time
  Enter number of characters to display field data in [4] : 8
  Enter number of decimal places to display [2] : 4
  Enter field (q to quit, \ to list) [time] : q
  Enter filename to save format (leave blank for none) [] :
  Enter first instance to list (0 to quit) [1] :
  Enter last instance to list [21] :
  Enter first instance to list (0 to quit) [1] : 0
  Enter examine option (h|l|o|d|f|g|...|q) [l] : q
  Enter selection (h|...|q) [e] : q


5. run the following command from the command line:
SYNTAX:

awk '{printf "%s \t %.2f\n", $1+SPRec->SP, $2*Water_Vel}' inputfile > outputfile       

Variables:
SPRec-SP    -     difference from shot record to SP (1000)
Water_Vel   -     water velocity used / 2       ( 750)
inputfile   -     the DBU list
outputfile  -     file to give to the QC

Example:
awk '{printf "%s \t %.2f\n", $1+1000, $2*750}' EX00-22.WD.dbu > EX00-22.waterdepth

.
And finally here is the script


#!/opt/perl5/bin/perl

#################################################################################
#   GENERAL COMMENTS FOR THIS SCRIPT   #
#################################################################################
#           #
# THE TOP LINE YOU SEE IN THIS SCRIPT IS THE LINE THAT TELLS THE COMPUTER WHERE #
# TO LOOK FOR THE PERL - EXECUTOR. (NO IT HAS NOTHING TO DO WITH HANGING :-) #
# IF YOU DO NOT KNOW WHERE PERL IS LOCATED/INSTALLED ON YOUR COMPUTER THEN TRY  #
# TYPING THE FOLLOWING AT THE COMMAND PROMPT.     #
#           #
# whereis perl         #
# which perl         #
#           #
# IMPORTANT:  THIS SCRIPT WAS WRITTEN FOR PERL VERSION 5. SINCE THERE WAS A  #
#   BIG RE-WRITE/CHANGE FROM V4 TO V5 I DOUBT IT WILL RUN ON PERL 4 #
#           #
# 17 Dec 1999,          #
# Kenneth Thorman        #
#################################################################################




 

#################################################################################
#    SUB ROUTINE DECLARATION     #
#################################################################################
sub GetFileBaseName;
sub CheckCommandLineSyntax;
sub OpenFiles;
sub DecideDepthOutput;
sub PutWaterDepthFileIntoArray;
sub ExtractSP;
sub ExtractWD;
sub FormatEchosounderTimeStamp;
sub CreateEchosounderDataCard;
sub Presentation1;
sub Presentation2;
sub Presentation3;
sub Presentation4;
sub DisplayOnlineHelp;
sub DisplayHELPSEISMIC;

#################################################################################
#   VARABLE DECLARATION     #
#################################################################################
$wd_file = $ARGV[1];
$outfile = "$ARGV[0]" . ".wd";
$log_dir = "./";



#################################################################################
#   MAIN SCRIPT FLOW     #
#################################################################################
CheckCommandLineSyntax;
OpenFiles;
$filebasename = GetFileBaseName($ARGV[0]);
$wd_line = <WDFILE>;
$firstsp = int (ExtractSP ($wd_line));
PutWaterDepthFileIntoArray;
$fields_in_array = @wd_file;
$wdc = 0;

$min_sp = 100000;
$x = $fields_in_array;
while ($x >= 0) {    # Loop through the array backwards
  if (($min_sp > $x) and (@wd_file[$x] ne "")) {
    $min_sp = $x;
  }
  $x--;
}

$x = $min_sp;
while ($x <= $fields_in_array) {
  if ((@wd_file[$x] eq "") and (@wd_file[$x-1] ne "")){
    @wd_sp_string[$wdc] = $x-1;
    $wdc++;
  }
  elsif ((@wd_file[$x] ne "") and (@wd_file[$x-1] eq "")){
    @wd_sp_string[$wdc] = $x;
    $wdc++;
  }  
  $x++;
}
print (@wd_sp_string);

$x = 1;
while (@wd_sp_string[$x] ne ""){
  if (@wd_sp_string[$x+1] eq ""){
    $wd_sp_ranges = $wd_sp_ranges . $wd_sp_string[$x-1] . "-" . $wd_sp_string[$x] . ".";
  }
  else{
   $wd_sp_ranges = $wd_sp_ranges . $wd_sp_string[$x-1] . "-" . $wd_sp_string[$x] . ", ";
  } 
  $x = $x + 2;
}
 
$fields_in_array = @wd_file;
$lastsp = $fields_in_array - 1;
$T14101_Cards_Encountered = 1;
Presentation1;


$sp_count = 0;
$line = <P291FILE>;
while ($line ne ""){
  if (($line =~ /E1000/) and ($line =~ /$filebasename/)){
    $p291sp = int(substr($line,24,8));
    if ($sp_count == 0){
      Presentation2;
      $sp_count = 1;
    }
    elsif (($p291sp % 500) == 0){
      Presentation3;
    }
    if (($T14101_Cards_Encountered == 0) and ($wd_file[$p291sp] ne "")){
      $ETimeStamp = FormatEchosounderTimeStamp($line);
      $wd = @wd_file[$p291sp];
      $string = CreateEchosounderDataCard($wd, $ETimeStamp, "                                                                     ");
      print OUTFILE ($string);
      print OUTFILE ($line);
      $T14101_Cards_Encountered = 0;
    }  
    else{
      print OUTFILE ($line);
      $T14101_Cards_Encountered = 0;
    }
  }
  elsif (($line =~ /T14101/) and ($wd_file[$p291sp] ne "")){
    $wd = @wd_file[$p291sp];
    $rest_str = substr($line,12,68);
    $string = CreateEchosounderDataCard($wd, $rest_str);
    print OUTFILE ($string);
    $T14101_Cards_Encountered = 1;
  }
  else{
    print OUTFILE ($line);
  }
  $line = <P291FILE>;
}
$date = `date -u +%x`;
chop ($date);
print LOGFILE ("$date     $outfile     $wd_sp_ranges\n");  
Presentation4;





#################################################################################
#   SUB ROUTINES      #
#################################################################################

#################################################################################
#COMMENTS: "CheckCommandLineSyntax"      #
#-------------------------------------------------------------------------------#
#          #
# ASSIGN THE NUMBER OF CMD. LINE ARG. TO $arg_cnt    #
# IF NUMBER OF CMD. LINE .ARG. NOT EQUALS 2     #
# EXIT PROGRAM AND DISPLAY THE FOLLOWING LINE     #
# "SYNTAX: correct_p291_wb <P291 File> <Waterdepth file> ..."   #
#################################################################################

sub CheckCommandLineSyntax{
  $arg_cnt = @ARGV;   
  if ($ARGV[0] eq "HELPSEISMIC"){
    DisplayHELPSEISMIC;
    die ("SYNTAX: correct_p2_fatho <P2 File> <Waterdepth file> ...\n\n");
  }
  elsif ($arg_cnt != 2){
    DisplayOnlineHelp;
    die ("SYNTAX: correct_p2_fatho <P2 File> <Waterdepth file> ...\n\n");
  }
}








sub DisplayOnlineHelp{
  $~ = "HELP";
  write;
}
format HELP =

     correct_p2_fatho v1.3
      by
       Kenneth Thorman
          
DESCRIPTION:
------------
This script comes in handy when your echosounder data in the P291/P294
file either is extremly noisy or non existing. The script can:
- correct noisy data, by deleting corrupt data and inserting new data
- insert new data where data is missing
The script needs the P291/P294 file with the bad data, also needed is a 
"WD" file where each line contains a data pair of (SP/Water depths).


WHERE TO GET THE WD FILE: 
-------------------------
SEISMIC data processors. To see more help type: "correct_P2_fatho HELPSEISMIC"
at the command prompt.

This version the script allows multiple SP ranges in the WD file.
What happens is that when the SEISMIC data operators pick the water bootom for the 
SP range that are bad, the normally pick all SP ranges at one time so you 
get at WD file consisting of several SP range-datasets like this (SP/depth):

 992   1285 (start first SP range)
  .         .
 999    1022 (end first SP range)
1245   1000 (start second SP range)
  .         .
1255   995 (end second SP range) 
The script will insert or remove the values for the SP ranges in the WD file
(in this example: 992-999, 1245-1255). The SP range in between the 2 ranges will 
not be changed and will retain the original values from the echosounder.


IMPORTANT: 
----------
The water depth file cannot have any text/empty lines in the beginning 
of the file. The last line must be the last data set (no empty lines). 
The script does not care about spaces or TABs on a line. Leading spaces 
in a line is no problem. Some times the file from SEISMIC data processing can have double
values for the same shotpoint, this is not a problem.

 992   1285 (double values, not a problem)
 992   1285
   993     1285 (different spacing, not a problem)
   
.






sub DisplayHELPSEISMIC{
  $~ = "HELPSEISMIC";
  write;
}
format HELPSEISMIC =

     correct_p2_fatho v1.3
      by
       Kenneth Thorman
    SEISMIC data - HELP
          
1. Migrate the Near Trace Gather
2. Pick the water depth of migrated near trace gather in QCTOOL
3. Output formatted listing of DBU (this will be shotrec/time pairs)
4. In DBU examine file created in QCTOOL

EXAMPLE DBU FORMATTED OUTPUT LISTING

  Enter project name [exnor] : 
  Enter access mode (r[eadonly], u[pdate]) [readonly] : u
  Enter selection (h|...|q) [l] : e 17awbpick
  Enter examine option (h|l|o|d|f|g|...|q) [l] : o
  Enter output listing file name (or "stdout") [stdout] : 17wblist.txt
  Enter examine option (h|l|o|d|f|g|...|q) [l] : f
  Enter filename of an existing format [] : 
  Enter maximum line length for the list (max 255) [79] : 
  Enter number of chars for instance number (0=none) [0] : 
  Enter repeat factor for header line (0=no header) [50] : 0
  Select fields to tabulate :
  Enter field (q to quit, \ to list) [KEY1] : 
  Enter number of characters to display field data in [4] : 8
  Enter field (q to quit, \ to list) [key1] : time
  Enter number of characters to display field data in [4] : 8
  Enter number of decimal places to display [2] : 4
  Enter field (q to quit, \ to list) [time] : q
  Enter filename to save format (leave blank for none) [] : 
  Enter first instance to list (0 to quit) [1] : 
  Enter last instance to list [21] : 
  Enter first instance to list (0 to quit) [1] : 0
  Enter examine option (h|l|o|d|f|g|...|q) [l] : q
  Enter selection (h|...|q) [e] : q


5. run the following command fron the command line:
SYNTAX:

awk '{printf "%s \t %.2f\n", $1+SPRec->SP, $2*Water_Vel}' inputfile > outputfile  

Variables:
SPRec-SP    -  difference from shot record to SP (1000)
Water_Vel   -  water velocity used / 2    ( 750)
inputfile   -  the DBU list 
outputfile  -   file to give to the QC

Example:
awk '{printf "%s \t %.2f\n", $1+1000, $2*750}' EX00-22.WD.dbu > EX00-22.waterdepth

.




#################################################################################
#COMMENTS: "GetFileBaseName"       #
#-------------------------------------------------------------------------------#
#          #
# INPUT  : FILENAME (TEXT STRING) EX. BR98-092.0.P291    #
# FUNCTION: READ THE STRING FROM THE START WHEN ENCOUNTING A "." THROW THE REST #
#    AWAY         #
# OUTPUT  : FILE BASE NAME (LINENAME) EX. BR98-092    #
#################################################################################

sub GetFileBaseName{
  my ($input_str) = @_;
  my ($end_pos, $temp);
      
  if ($input_str =~ /\./g)
  {
    $end_pos = pos ($input_str);
  }
  
  $temp = substr ($input_str, 0, $end_pos-1);
  return ($temp);
}








#################################################################################
#COMMENTS: "OpenFiles"        #
#-------------------------------------------------------------------------------#
#          #
# OPENS ALL NECCESARY FILES (P291 ORIGINAL, WATER DEPTH FILE, OUTFILE)  #
# IF A PROBLEM IS ENCOUNTERED OPENING ANY OF THE FILE AN ERROR MESSAGE IS  #
# DISPLAYED.         #
#################################################################################

sub OpenFiles{
  open (WDFILE, "$wd_file") or die ("Could not open infile $ARGV[1].\n");
  open (P291FILE, "$ARGV[0]") or die ("Could not open infile $ARGV[0].\n");
  open (OUTFILE, ">$outfile") or die ("Could not open outfile $outfile.\n");
  $log = $log_dir . "correct_P2_fatho.LOG";
  if (-e $log){
    open (LOGFILE, ">>$log") or die ("Could not open logfile.\n");
  }
  else{
    open (LOGFILE, ">$log") or die ("Could not open logfile.\n");
    print LOGFILE ("\n\n\n     Echosounder data\n");
    print LOGFILE ("Date:      Filename:   corrected for SP range:\n");
    print LOGFILE ("-----------------------------------------------------------\n");
  }
}







#################################################################################
#COMMENT: "Presentation 1-3"       #
#-------------------------------------------------------------------------------#
#          #
# THIS SUB ROUTINE PRESENTS IMPORTANT INFORMATION TO THE USER IN A EASIER #
# READABLE WAY THAN IF IT HAD BEEN TRHOWN OUT ON THE SCREEN.   #
#################################################################################

sub Presentation1{
$~ = "MYFORMAT1";
write;
}

sub Presentation2{
$~ = "MYFORMAT2";
write;
}

sub Presentation3{
$~ = "MYFORMAT3";
write;
}

sub Presentation4{
$~ = "MYFORMAT4";
write;
}


format MYFORMAT1 =




***************************************************************************
*      FILE       |  FIRST SP  |  LAST SP  |  NOW PROCESSING  *
***************************************************************************
* Water depth correction file: @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*
           $wd_sp_ranges
***************************************************************************
.

format MYFORMAT2 =
* @<<<<<<<<<<<<<<<<<<<<<<<<<< |  @|||||||  |           |      *
  @ARGV[0]     $p291sp
.

format MYFORMAT3 =
* @<<<<<<<<<<<<<<<<<<<<<<<<<< |       |           |@|||||||||||||||||*
  @ARGV[0]          $p291sp
.

format MYFORMAT4 =
* @<<<<<<<<<<<<<<<<<<<<<<<<<< |       |  @||||||  |    *
  @ARGV[0]        $p291sp
***************************************************************************
* STATUS: FINISHED PROCESSING            *
* CORRECTED P291 FILE: @<<<<<<<<<<<<<<<<<<<<<<      *
   $outfile
***************************************************************************

Author  : Kenneth Thorman        
Script  : correct_p291_fatho v1.3      
Date    : 19/6 - 1999        


*********************************************************************
* This script is shareware, if you find this script usefull a small *
* contribution would be appreciated.                                *
* It encourages to write more usefull scripts and programs.         *
*********************************************************************
       
.








#################################################################################
#COMMENT: "ExtractSP"        #
#-------------------------------------------------------------------------------#
#           #
# THIS SUB ROUTINE EXTRACTS THE SHOTPOINT NUMBER IN A TEXT FILE   #
# THE FILE CAN HAVE LEADEING SPACES, IT CAN BE UNEVENLY SEPARATED BY SPACES #
# BUT IT MUST CONTAIN 2 NUMBERS PER LINE, SHOTPOINT NUMBER / WATERDEPTH  #
# EXAMPLES: 993   1285       #
#    994       1285   #
#          #
# BASICALLY IT DOES NOT CARE ABOUT SPACES, AT THE TOP OF THE FILE THE CAN BE #
# NO TEXT, AND THERE CAN BE NO BLANK LINES INBETWEEN LINES   #
#          #
# INPUT:  ONE LINE OF TEXT      #
# RETURN VALUE: SHOTPOINT NUMBER      #
#################################################################################

sub ExtractSP{

  my ($temp_line) = @_;
  my (@temp_array, $SP);
  
  @temp_array = split (/[\t ]+/, $temp_line);
  if (@temp_array[0] eq ""){
    $SP = @temp_array[1];
  }
  else{
    $SP = @temp_array[0];
  }
  return ($SP);
}








#################################################################################
#COMMENT: "ExtractWD"        #
#-------------------------------------------------------------------------------#
#           #
# THIS SUB ROUTINE EXTRACTS THE WATER DEPTH NUMBER IN A TEXT FILE  #
# THE FILE CAN HAVE LEADEING SPACES, IT CAN BE UNEVENLY SEPARATED BY SPACES #
# BUT IT MUST CONTAIN 2 NUMBERS PER LINE, SHOTPOINT NUMBER / WATERDEPTH  #
# EXAMPLES: 993   1285       #
#    994       1285   #
#          #
# BASICALLY IT DOES NOT CARE ABOUT SPACES, AT THE TOP OF THE FILE THE CAN BE #
# NO TEXT, AND THERE CAN BE NO BLANK LINES INBETWEEN LINES   #
#          #
# INPUT:  ONE LINE OF TEXT      #
# RETURN VALUE: WATER DEPTH       #
#################################################################################

sub ExtractWD{

  my ($temp_line) = @_;
  my (@temp_array, $WD);

  @temp_array = split (/[\t ]+/, $temp_line);
  if (@temp_array[0] eq ""){
    $WD = @temp_array[2];
  }
  else{
    $WD = @temp_array[1];
  }
  return ($WD);
}








#################################################################################
#COMMENT: "PutWaterDepthFileIntoArray"      #
#-------------------------------------------------------------------------------#
#          #
# THIS SUB ROUTINE DOES EXACTLY WHAT THE NAME IMPLIES. IT USES THE 2 ABOVE  #
# MENTIONED SUB ROUTINES TO EXTRACT THE SP NUMBER AND THE WATER DEPTH FROM A  #
# FILE AND THE IT PUTS THESE VALUE INTO AN ARRAY BUILT UP AS FOLLOWS  #
# SP NUMBER (INDEX VALUE) / WATER DEPTH (FIELD VALUE)    #
#          #
# SO NOW YOU CAN REFERENCE THE ARRAY BY SHOTPOINT NUMBER AND THEN GET THE  #
# WATER DEPTH AT THE SPECIFIC SHOTPOINT      #
#################################################################################

sub PutWaterDepthFileIntoArray{
  while ($wd_line ne ""){
    $sp = int (ExtractSP ($wd_line));
    $wd = int (ExtractWD ($wd_line));
    @wd_file[$sp] = $wd;
    $wd_line = <WDFILE>;
  }
}







#################################################################################
#COMMENT: "FormatEchosounderTimeStamp"      #
#-------------------------------------------------------------------------------#
#          #
# THIS SUB ROUTINE EXTRACTS AND FORMATS THE TIMESTAMP FROM THE E1000 GENERAL    #
# CARD  IN THE P291 FILE AND SUBTRACTS 0.1 SECOND.     #
# THIS IS DONE SO THE ECHOSOUNDER CARD CREATED LATER        #
# WILL HAVE A SEQUENTIAL INCREASING TIMESTAMP COMPARED TO THE CARDS LOCATED     #
# BEFORE AND AFTER IT IN THE P291 FILE. THE TIME STAMP IN THE GENERAL CARD IS   #
# NOT IN THE SAME FORMAT AS THE TIMESTAMP NEEDED FOR THE ECHOSOUNDER CARD #
#          #
# GENERAL CARD: HHMMSS.S       #
# ECHO CARD   : HHMMSSS        #
#          #
# INPUT  : E1000 - GENERAL LINE FROM P291 FILE     #
# OUTPUT : TIMESTAMP IN CORRECT FORMAT FOR A T14101 CARD IN THE P291 FILE #
#################################################################################

sub FormatEchosounderTimeStamp{
  
  my ($temp_line) = @_;
  my ($string);
  
  $time = substr ($temp_line,58,9);
  $string = sprintf("%7.1f", ($time - 0.1));
  $string =~ s/\.//;
  return ($string);
}







#################################################################################
#COMMENT: "CreateEchosounderDataCard"      #
#-------------------------------------------------------------------------------#
#          #
# THIS SUB ROUTINE CREATES/OR MODIFIES THE ECHOSOUNDER CARD DEPENDING ON WHICH  #
# ROUTINE IT IS CALLED FROM, AND WHAT VARIABLE THAT IS PASSED TO IT.  #
#################################################################################

sub CreateEchosounderDataCard{

  my ($wd, $TimeStamp, $end_string) = @_;
  my ($temp_string);

  if ($wd < 100){      
    $water_depth = sprintf("  %5.1f", $wd);
  }
  elsif ($wd < 1000){
    $water_depth = sprintf(" %5.1f", $wd);
  }
  else{
    $water_depth = sprintf("%5.1f", $wd);
  }

  $temp_string = "T14101" . "$water_depth" . "$TimeStamp" . "$end_string";
  $string = substr($temp_string,0,80) . "\n";
  return ($string);
}








# Version History:

# 1.3:  The online help and the comments in the script clarified
# plus check for compatibility with the new P294 format
# 1.2:  Seperate SP-ranges in the WD file allowed
# 1.1:  The output to a log file was created.
# 1.0:  Script created

Wednesday, June 21, 2000

Perl P190 Navigation Merge script (code nostalgia)

Continuing my explorations of the newly found old backup hard disk I found this small gem :). The code and structure could arguably be improved on a lot but considering this stared out as a small utility script that was immensely useful and it just worked back in 1997-1999 it never got rewritten.

When you are doing marine seismic exploration you are  gathering navigation data (where have the boat been and where have the recording equipment been located) when recording the seismic data. Quite a lot of the time a "seismic line" cannot be completed due to equipment failure, noise on the hydrophones ... If this happens you circle the boat and try again with an Alpha, Beta ... part of the line.
 
The navigation data is recorded in P2/91 & P2/94 (P2/94 EXCHANGE FORMAT FOR RAW MARINE POSITIONING DATA) format and the processed navigational data can be in P1/90 format (U.K.O.O.A. P1/90 POST PLOT DATA EXCHANGE TAPE).

Anyway this perl script processes and combines any Alpha, Beta ... lines into one file.

#!/opt/perl5/bin/perl

# Script : Navigation Merging v1.52

# Description 

# This script takes any number of P190 files and merges them 
# file1, file2,file3. It can deal with incrementing SP as well
# as decrementing SP. It also searches Alpha, Bravo, Charlie 
# linename prefixes in the final .cmb file. and changes this to
# the basic linename. Corrects LSP in the header if Streamer Model
# comment is exported to file.

# Linename P98-030
# Alphaname P98-030A

# The script looks for
# VP98-030A       1    3141145154.97S12115 5.23E...

# and changes this to
# VP98-030        1    3141145154.97S12115 5.23E...

# Author :     Kenneth Thorman
# Date      :     27/12/1998




# MAJOR SUBROUTINES
sub CheckCommandLineSyntax;
sub CreateOutputFile;
sub WriteFirstFile;
sub WriteMiddleFiles;
sub GetSPRangeForAllFiles;
sub CreateSPRangeHeaderRemark;



# MINOR SUBROUTINES
sub Author;
sub GetFileBaseName;
sub MakeReadmeFile;
sub Presentation1;
sub HeadFileInfo;
sub BodyFileInfo;
sub Finish;
sub HeaderStatus;



################################### MAIN SCRIPT ################################

    # Check that the user has entered the right amount of command line arguments
    CheckCommandLineSyntax;
    
    # Open the "cmb" file for writing
    CreateOutputFile;

    # In order to put a remark in the header about the streamer shaping we need
    # to know the SP range for the files we are combining
    GetSPRangeForAllFiles;

    # Create the header remark
    CreateSPRangeHeaderRemark;

    # Since we have to process the first file different than the remaining files
    # we do it here in a seperate procedure
    WriteFirstFile;

    # Since we have processed the first file above lets set the pointer to the 
    # second supplied file
    $counter = 1;

    # Run through the files one by one as long as there files left to process
    while ($counter < ($arg_cnt))
    {
         WriteMiddleFiles ($ARGV[$counter]);

         # Move to the next file
         $counter++;
    }

    # Make a README file in the current directory explaining the "cmb" file
    MakeReadmeFile;

    # Before finishing of time for a little bit of narcisim from my side
    # Print the author information
    Finish;



################################### SUB ROUTINES ###############################

sub CheckCommandLineSyntax
{
  $arg_cnt = @ARGV;   # GET NUMBER OF CMD LINE ARGUMENTS
  if ($arg_cnt < 2)   # IF 0 CMD ARGUMENTS THEN ERROR!
  {
    print ($info);
    print ("\nYou need at least 2 command line arguments!\n");
    die ("SYNTAX: nav_merge file1 file2 file3 file4 ...\n\n");
    Author;
  }
}



sub GetSPRangeForAllFiles{
  
  $arg_cnt = @ARGV;
  $file_cnt = 0;
  
  foreach $file (@ARGV){
    open (P190FILE, "$file") or die ("Could not open $file.\n");
    #print ("Checking SP range for file: $file...\n");
    $filebasename = GetFileBaseName($file);
    $vessel_record = "V" . $filebasename;
    $count = 1;
     
    $line = <P190FILE>;
    while ($line !~ /EOF/g){
      if ($line =~ /^V/) {
         $last_sp = substr($line,20,5);    
      }
      $line = <P190FILE>;
      if (($line =~ /$vessel_record/) and ($count != 0)){
        $first_sp = substr($line,20,5); 
        $count--;
      }
    }
    
    if ($file_cnt == 0){
      $final_FSP = $first_sp;
      #print ("Final FSP is: $final_FSP\n");
      $FSP = 1;
    }
    if ($file_cnt == $arg_cnt-1){
      $final_LSP = $last_sp;
      #print ("Final LSP is: $final_LSP\n");
    }
    $file_cnt++;
    close (P190FILE);
  }
  #print ("FSP: $final_FSP   LSP: $final_LSP\n");
}



sub CreateOutputFile
{
  $output_filename = $ARGV[0] . ".cmb";
  open (OUTFILE,">$output_filename") or die ("Cannot create file $output_filename");
}



sub CreateSPRangeHeaderRemark
{
  $cmb_line_name = GetFileBaseName ($ARGV[0]);
  $temp_str1 = sprintf("H2600 Streamer_001 Model Line %s shots %d to %d: Arc                                          ",
                        $cmb_line_name, $final_FSP, $final_LSP);  
  $temp_str2 = substr($temp_str1,0,80);
  $str_to_add = $temp_str2 . "\n";
}



sub WriteFirstFile
{
  open (HEADFILE, "$ARGV[0]") or die ("Could not open file $ARGV[0].\n");
  $filebasename = GetFileBaseName($ARGV[0]);
  $count = 1;
     
  $line = <HEADFILE>;
  $file_card = substr($line,0,1);
  $SPrangeheader = "no";
  $vessel_record = "V" . $filebasename;

  while ($line !~ /EOF/g){
    if ($file_card eq "H"){
      if (($line =~ /H2600/) and ($line =~ /$final_FSP/) and ($line !~ /$final_LSP/)){ 
        print OUTFILE ($str_to_add);
 $error =  "LSP in the header was incorrect.\n";
        $status = "Corrected. Now reflecting actual SP range in .cmb file";
        HeaderStatus;
        $SPrangeheader = "yes";
      }
      else{
        print OUTFILE ($line);
      }
    }
    elsif ($SPrangeheader eq "no"){
      print OUTFILE ($str_to_add);
      print OUTFILE ($line);
      $error =  "No SP range comment found in header";
      $status = "Corrected. Comment inserted: $str_to_add";
      HeaderStatus;
      $SPrangeheader = "yes";
    }
    else{ 
      print OUTFILE ($line);
    }   
    if ($line =~ /^V/) {
       $headfile_lsp = substr($line,20,5);
    }
    if (($line =~ /$vessel_record/) and ($count != 0)){
      $headfile_fsp = substr($line,20,5); 
      $count--;
    }
    $line = <HEADFILE>;
    $file_card = substr($line,0,1);
    
  }
  Presentation1;
  HeadFileInfo;
  close (HEADFILE);
}   



sub WriteMiddleFiles
{
  my ($file_name) = @_;    #  ASSIGN PASSED PARA. TO $file_name
  
  open (BODYFILE, $file_name) or die ("Could not open file $file_name.\n");
   
  $line = <BODYFILE>;
  $count = 1;
  
  if ($headfile_fsp < $headfile_lsp)             # INCREMENTING SP NUMBER
  { 
    while ($line ne "")
    {
      
      if  ($line =~ /^V/) {
        $temp = substr($line,20,5);
      }
             
      if (((~ /$filebasename/) and ($temp > $headfile_lsp) and ($line !~ /H2600/)) or ($line =~ /EOF/)) {
        if ($line =~ /$filebasename[A-Z]/)
        {
          $line =~ s/$filebasename[A-Z]/$filebasename /;
          print OUTFILE ($line);
        }
        elsif ($counter == ($arg_cnt-1))
        { 
          print OUTFILE ($line);
        }
        if (($line =~ /$vessel_record/) and ($count != 0)) #  GET FIRST EXPORTED SP
        {
          $tailfile_fsp = substr($line,20,5); 
          $count = 0;
        }
      }

      if (($line =~ /^R/) and ($temp < $headfile_lsp)){
       print OUTFILE ($line);
      }

      if ($line =~ /^V/) {
        $tailfile_lsp = substr($line,20,5);
      } 

      $line = <BODYFILE>;
    }
  }
  
  
  elsif ($headfile_fsp > $headfile_ls)
  {
    while ($line ne "")    # DECREMENTING SP NUMBER
    {
      
      
      if  ($line =~ /^V/) {
        $temp = substr($line,20,5);
      }
      
      
      if ((($line =~ /$filebasename/) and ($temp < $headfile_lsp) and ($line !~ /H2600/)) or ($line =~ /EOF/))      {      
        if ($line =~ /$filebasename[A-Z]/) {
          $line =~ s/$filebasename[A-Z]/$filebasename /;
          print OUTFILE ($line);
        }
        elsif ($counter == ($arg_cnt-1)) { 
          print OUTFILE ($line);
        }
        if (($line =~ /$vessel_record/) and ($count != 0)) {
          $tailfile_fsp = substr($line,20,5); 
          $count = 0;
        }
      }

      if (($line =~ /^R/) and ($temp < $headfile_lsp)){
       print OUTFILE ($line);
      }

      
      if ($line =~ /^V/) {
        $tailfile_lsp = substr($line,20,5);
      } 

      $line = <BODYFILE>;
    }
  }
  BodyFileInfo;
  $headfile_fsp = $tailfile_fsp;
  $headfile_lsp = $tailfile_lsp;  
  close (BODYFILE);
}



sub MakeReadmeFile
{ 
  open (READMEFILE,"> README");
  print READMEFILE ("Note: All \"cmb\" files are combined from multiple line segments\n");
  print READMEFILE ("      as per the seismic merge.\n");
  close (READMEFILE);
}



sub Author
{
  print ("\n\nNavigation Merge v1.5\n\n");
  print ("Author: Kenneth Thorman\n");
  print ("Date  : 27 dec 1998\n");
}



sub HeaderStatus{
$~ = "HEADERSTATUS";
write;
}



sub Presentation1{
$~ = "MYFORMAT1";
write;
}



sub HeadFileInfo{
$~ = "HEADFILEINFO";
write;
}



sub BodyFileInfo{
$~ = "BODYFILEINFO";
write;
}



sub Finish{
$~ = "FINISH";
write;
}



format HEADERSTATUS =



Header Check                                  
------------
Output File : @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
$output_filename
Error       : @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
$error
Status      : @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
$status
.



format MYFORMAT1 =


Combining Files
---------------
.

format HEADFILEINFO =
@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<       @<<<<<              @<<<<<      
 $ARGV[0]                    $headfile_fsp        $headfile_lsp   
.

format BODYFILEINFO =
@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<        @<<<<<              @<<<<<      
 $ARGV[$counter]                   $tailfile_fsp        $tailfile_lsp   
.  



format FINISH =





                                    
27-Dec-1998, Kenneth Thorman 

.



sub GetFileBaseName
{
  my ($input_str) = @_;
  my ($end_pos, $temp);
      
 if ($input_str =~/_/g){
    $start_pos = pos ($input_str);
  }    
  if ($input_str =~ /\./g){
    $end_pos = pos ($input_str) - $start_pos ;
  }
  
  $temp = substr ($input_str, $start_pos, $end_pos-1);
  return ($temp);
}


#Script   : navigation_merge
#Version  : 1.5
#Author   : Kenneth Thorman

Hydrogeologic : Extract data from seismic files for use in surfer

I recently found a backup storage hard disk. This hard disk holds scripts and programs going back to 1998.

This specific perl script I am posting here is/was used to extract water depth data from p190 seismic files and convert it into a format Surfer could read and in turn generate images as the one below (image source: Schlumberger Water Services)





If I were to write this today I would probably write it rather differently, but alas it worked.

Pure nostalgia.

#!/opt/perl5/bin/perl

# SUB ROUTINE DECLARATION     
sub GetFileBaseName;
sub Grab_XYZ;
sub Presentation1;
sub Presentation2;
sub Presentation3;


# VARABLE DECLARATION  
@file_list = `ls -1 *.{p190,WGS84,tc,AGD84,AGD66,WGS-84,AGD-84,AGD-66,Kertau}`;
chop @file_list;
open (OUTFILE, ">surfer_file") or die ("Could not open surfer_file.\n");


# MAIN SCRIPT FLOW
Presentation1;
foreach $file (@file_list){
  open (P190FILE, "$file") or die ("Could not open $file.\n");
  $filebasename = GetFileBaseName($file);
  $shot_record = "S" . $filebasename;
  $counter = 0;
  print ("Filebasename: $filebasename\n");  
  $line = <P190FILE>;
  while ($line ne ""){
    Grab_XYZ;
  }
  close (P190FILE);
   
  if ($counter == 0){
    $status = "FAILED";
  }
  else{
    $status = "OK";
  }
  Presentation2;
}
close (OUTFILE);
Presentation3;



# SUB ROUTINE IMPLEMENTATIONS

sub Grab_XYZ{
 if ($line =~ /$shot_record/){
   $x = substr($line,46,9);
   $y = substr($line,55,9);
   $z = -substr($line,64,6);
   print OUTFILE ("$x $y $z\n");
   $line = <P190FILE>;
   $counter++;
   #print ("$counter\n");
 }
 else{
   $line = <P190FILE>;
 }
}



sub GetFileBaseName
{
  my ($input_str) = @_;
  my ($end_pos, $temp);
  
  if ($input_str =~/_/g){
    $start_pos = pos ($input_str);
  }    
  if ($input_str =~ /\./g){
    $end_pos = pos ($input_str) - $start_pos ;
  }
  
  $temp = substr ($input_str, $start_pos, $end_pos-1);
  return ($temp);
}






sub Presentation1{
$~ = "MYFORMAT1";
write;
}

sub Presentation2{
$~ = "MYFORMAT2";
write;
}

sub Presentation3{
$~ = "MYFORMAT3";
write;
}

format MYFORMAT1 =




********************************************************************************
*           |     NUMBER OF VALUES GRABBED      |          *
*              FILE     |-----------------------------------|  STATUS  *
*    |     X     |     Y     |     Z     |          *
********************************************************************************
.


format MYFORMAT2 =
* @<<<<<<<<<<<<<<<<<<<<<<<<<<   |  @||||||  |  @||||||  |  @||||||  | @||||||| *
  $file        $counter    $counter    $counter   $status
.

format MYFORMAT3 =
********************************************************************************
* STATUS: FINISHED PROCESSING             *
* OUTPUT FILE: surfer_file                  *
********************************************************************************

Author  : Kenneth Thorman        
Script  : make_surfer_file      
Date    : 26/4 - 1999        
.