2009-01-09 19 views

Respuesta

26

La mejor manera es utilizar Spreadsheet::ParseExcel.

Aquí se muestra un ejemplo:

#!/usr/bin/perl -w 

use strict; 
use warnings; 

use Spreadsheet::ParseExcel; 

my $parser = Spreadsheet::ParseExcel->new(); 
my $workbook = $parser->parse('Book1.xls'); 

for my $worksheet ($workbook->worksheets()) { 

    my ($row_min, $row_max) = $worksheet->row_range(); 
    my ($col_min, $col_max) = $worksheet->col_range(); 

    for my $row ($row_min .. $row_max) { 
     for my $col ($col_min .. $col_max) { 

      my $cell = $worksheet->get_cell($row, $col); 
      next unless $cell; 

      print "Row, Col = ($row, $col)\n"; 
      print "Value  = ", $cell->value(),  "\n"; 
      print "Unformatted = ", $cell->unformatted(), "\n"; 
      print "\n"; 
     } 
    } 
} 

Para convertir un archivo de Excel a texto con Perl Me gustaría recomendar excel2txt hoja de cálculo que utiliza :: ParseExcel.

4

La 'mejor' manera sería, idealmente, utilizar un módulo de nuestra querida CPAN.

Cuando usted tiene un problema Pregunte al instante: ¿Por qué no he comprobado CPAN todavía?

Cuál módulo sin embargo no puedo estar seguro, aquí hay una lista para comenzar, pruébelos, vea qué funciona.

1
@echo off 
ECHO CHECK THE VERSION FROM THE PROJECT 
    echo see also this [link][1] 
for /f "tokens=*" %%i in ('Type Version.txt') do set _Version=%%i 
ECHO The _Version is %_Version% 

ECHO remove the output html files 
del *.html /q 
ECHO remove the output log files 
del *.log /q 


::pause 
ECHO %0 > %0.log 
ECHO %0.error.log >%0.error.log 

set BaseDir=D:\perl\sfw\ExcelToHtml.%_Version% 
echo BaseDir is %BaseDir% 1>>%0.log 2>>%0.error.log 
ECHO. 


set LogLevel=3 
echo LogLevel is %LogLevel% 1>>%0.log 2>>%0.error.log 
ECHO. 


::set ExcelFileToParse="%BaseDir%\CoDA_ETL_Integration.xls" 
SET ExcelFileToParse="%BaseDir%\TODO.xls" 
echo ExcelFileToParse is %ExcelFileToParse% 1>>%0.log 2>>%0.error.log 
echo. 

set OutputDir=%BaseDir% 
echo OutputDir is %Outputdir% 1>>%0.log 2>>%0.error.log 
echo. 

ECHO SET THE UNICODE FOR PERL FOR UTF-8 
SET PERL_UNICODE=S 
ECHO %%PERL_UNICODE%% IS %PERL_UNICODE% 

::set PerlScript=parseExcelToCsv.pl 
set PerlScript=ExcelToHtml.pl 
echo PerlScript is %PerlScript% 1>>%0.log 2>>%0.error.log 
ECHO. 


echo Action !!! 1>>%0.log 2>>%0.error.log 
echo perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log 
perl %BaseDir%\%PerlScript% %LogLevel% %ExcelFileToParse% %OutputDir% 1>>%0.log 2>>%0.error.log 

%0.error.log 
%0.log 

::debug pause 

use strict; 
use Spreadsheet::ParseExcel; 
use utf8 ; 
use Encode ; 


package ExcelToHtml ; 



my $DebugLevel = 3 ; 
print defined($ARGV[0]) ? $DebugLevel = $ARGV[0] : "Using max DebugLevel = $DebugLevel", " \n"; 
my $minimumAmountOfCommandLineArgs = 3 ; 
my ($ExcelFileToParse , $OutputDir , $BaseFileName) =(); 


sub main { 
    logMsg("$0 SCRIPT START " , 1); 
    #Action !!! call here methods 
    checkCommandLineArgs();  
    buildNames(); 

    ParseExcelAndlogMsgFiles ($ExcelFileToParse , $BaseFileName , $OutputDir) ; 

    logMsg("$0 SCRIPT STOP " , 1); 

} #eof MAIN 

sub buildNames 
{ 

    $ExcelFileToParse = "$ARGV[1]" ; 
    $OutputDir="$ARGV[2]" ; 
    $BaseFileName = $ExcelFileToParse ; 
    logMsg("\$ExcelFileToParse is $ExcelFileToParse") ; 
    logMsg ("\$OutputDir is $OutputDir") ; 
    logMsg ("\$BaseFileName is $BaseFileName \n"); 
    $BaseFileName =~ s/^(.*)(\\|\/)(.*)/$3/; #strip the directory part 
    logMsg ("\$BaseFileName is $BaseFileName "); 
    $BaseFileName =~ s/^(.*)(\.)(.*)/$1/ ; #strip the file extension 
    logMsg ("\$BaseFileName is $BaseFileName "); 


} #eof sub buildNames 


sub ParseExcelAndlogMsgFiles { 

my $ExcelFileToParse = shift ; 
my $BaseFileName = shift ; 
my $OutputDir = shift ; 
my $strToReturn = ""; 

my $parser = Spreadsheet::ParseExcel->new(); 
my $workbook = $parser->Parse("$ExcelFileToParse"); 

for my $worksheet ($workbook->worksheets()) { 

     my ($row_min, $row_max) = $worksheet->row_range(); 
     my ($col_min, $col_max) = $worksheet->col_range(); 

     for my $row ($row_min .. $row_max) { 
       my $rowStr = "" ; 
       for my $col ($col_min .. $col_max) { 

         my $cell = $worksheet->get_cell($row, $col); 
         next unless $cell; 

         logMsg (" Row, Col = ($row, $col)") ; 
         logMsg ("\$cell->encoding() is " . $cell->encoding()) ; 

         logMsg (" Value = ", $cell->value()); 
         $rowStr .= makeCell($cell->value())  ;    #The Value 

         logMsg ("Unformatted = ", $cell->unformatted()) ; 

       } #eof col 
      $rowStr = makeRow($rowStr); 
      $strToReturn .= $rowStr ; 
    } #eof for my row 

     my $worksheetName = $worksheet->{'Name'} ; 
     my $FileSheet = "$OutputDir/$BaseFileName" . '.' . $worksheetName . '.' . 'html' ; 
     $strToReturn = makeTable ($strToReturn); 
     $strToReturn = makeFile ($strToReturn); 
     logMsg ("\$strToReturn is $strToReturn ") ; 
     PrintToFile ($FileSheet, $strToReturn) ; 
     $strToReturn = "" ; 
     $FileSheet = "" ; 
     } #eof for my worksheet 

} #eof sub 


sub trim  
{ 
     $_[0]=~s/^\s+//; 
     $_[0]=~s/\s+$//; 
     return $_[0]; 
} 

# =========================================== eof sub trim 
# Action !!! 



sub makeRow 
{ 
    my $row = shift ; 
    return "<tr>" . $row . "</tr> \n" ; 
} 

sub makeCell 
{ 
    my $cell = shift ; 
    return "<td>" . "$cell" . " </td>" ; 
} 

sub makeTable 
{ 
    my $table = shift ; 
    return "\n <table> " . $table . "</table> \n" ; 

} 

sub makeFile 
{ 
    my $file = shift ; 

    $file = "<html> <head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\"/></head><body> $file </body></html>\n" ; 
    return $file ; 
} 


sub checkCommandLineArgs 
{ 

     if (@ARGV != $minimumAmountOfCommandLineArgs) 
     { 
      logErrorMsg ("Not enougn command line args supplied exit 1" , 1); 
      die "usage: $0 1 args \n" ; 
      exit 1 ; 
     } #eof if 
     else 
     { 
       foreach (@ARGV) { logMsg ("$_ ") ; } 
     } #eof else   if (@ARGV != 4) 


} #eof sub checkCommandLineArgs 

#log a message according to the DebugLevel 
sub logMsg 
{ 
    my $msg = shift ; 
    my $importance = shift ; 

    my $CurrentPerlScriptName = "$0" ; 
    my $niceMonth = GetANiceMonth(); 
    my $logFile = "$CurrentPerlScriptName" . '_' . "$niceMonth" . '.log' ; 
    $msg = GetANiceTime() .' --- ' . $msg . " \n" ; 

    if ($importance == 1) 
    { 
     $msg = "\n============================================================ \n" . $msg ; 
     $msg = $msg . "============================================================ \n" ; 
    } 


    #debug nothing 
    if ($DebugLevel == 0) { return ; } 

    #just logMsg the message 
    if ($DebugLevel == 1) 
    { 
     logMsg ($msg); 
    } #eof if ($DebugLevel == 1) 

    #logMsg the message in 
    if ($DebugLevel == 2) 
    { 
     #READ ALL ROWS OF A FILE TO ALIST 
     open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
     print LOG $msg ; 
     close LOG; 
    } 

    #logMsg the message in 
    if ($DebugLevel == 3) 
    { 
     #READ ALL ROWS OF A FILE TO ALIST 
     open (LOG, ">> $logFile") || print "could not open the \$logFile $logFile !!!\n"; 
     print LOG $msg ; 
     close LOG; 
     print $msg ; 
    } #eof if ($DebugLevel == 3) 


} #eof sub logMsg 

#log a message according to the DebugLevel 
sub logErrorMsg 
{ 
    my $errorMsg = shift ; 
    my $importance = shift ; 

    my $CurrentPerlScriptName = "$0" ; 
    my $niceMonth = GetANiceMonth(); 
    my $errorLogFile = "$CurrentPerlScriptName" . "_" . "$niceMonth" . '.error.log' ; 
    $errorMsg = GetANiceTime() .' --- ' . $errorMsg . " \n" ; 

     if ($importance == 1) 
     { 
      $errorMsg = "\n============================================================ \n" . $errorMsg ; 
      $errorMsg= $errorMsg. "============================================================ \n" ; 
     } 



    #debug nothing 
    if ($DebugLevel == 0) { return ; } 

    #just logMsg the message 
    if ($DebugLevel == 1) {  print $errorMsg ;  } 

    #logMsg the message in a error log file 
    if ($DebugLevel == 2) 
    { 
     #READ ALL ROWS OF A FILE TO ALIST 
     open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
     print ERRLOG $errorMsg ; 
     close ERRLOG; 
    } 

    #logMsg the message in 
    if ($DebugLevel == 3) 
    { 
     #READ ALL ROWS OF A FILE TO ALIST 
     open (ERRLOG, ">> $errorLogFile") || print "could not open the \$errorLogFile $errorLogFile !!!\n"; 
     print ERRLOG $errorMsg ; 
     close ERRLOG; 
     print $errorMsg ; 
    } #eof if ($DebugLevel == 3) 


} #eof sub logErrorMsg 


#GET A NICE TIME 
sub GetANiceTime { 
# Purpose: returns the time in yyyymmdd-format 
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
#---- change 'month'- and 'year'-values to correct format ---- 
$min = "0$min" if ($min < 10); 
$hour = "0$hour" if ($hour < 10); 
$mon = $mon + 1; 
$mon = "0$mon" if ($mon < 10); 
$year = $year + 1900; 
$mday = "0$mday" if ($mday < 10); 
return "$year\.$mon\.$mday" . "-" . "$hour\:$min\:$sec"; 
} #eof sub GetANiceTime 


sub GetANiceMonth { 
# Purpose: returns the time in yyyymmdd-format 
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); 
#---- change 'month'- and 'year'-values to correct format ---- 
$mon = $mon + 1; 
$mon = "0$mon" if ($mon < 10); 
$year = $year + 1900; 
return "$year\.$mon" ; 
} #eof sub GetANiceTime 



sub PrintToFile { 
my $FileOutput = shift ; 
my $StringToPrint = shift ; 
#READ ALL ROWS OF A FILE TO ALIST 
open (FILEOUTPUT, ">$FileOutput") || 
      print "could not open the \$FileOutput $FileOutput!\n"; 
print FILEOUTPUT $StringToPrint ; 
close FILEOUTPUT ; 

#debug $strToReturn .= $StringToPrint; 

} 
# =========================================== eof sub PrintToFile 



#Action !!! 
main(); 




1 ; 

__END__ 

#VersionHistory: 
#1.0. --- Yordan Georgiev --- Initial creation 
#1.1 --- Yordan Georgiev --- Added conditional logging 
+0

Esto es un gran lío de archivos por lotes y perl. podrías limpiarlo un poco y explicar qué es cada parte –

+1

este código es tan antiguo que realmente tuve que googlear mi gmail para encontrarlo: https://code.google.com/archive/p/exceltohtml/downloads –

1

http://search.cpan.org/dist/Spreadsheet-Read/Read.pm

Hoja de cálculo :: Lee intenta leer de forma transparente cualquier hoja de cálculo y devolver su contenido de una manera universal, independiente del módulo de análisis que hace el escaneo de hoja de cálculo real.

+1

@ DJ-Andrey-sXe: Bienvenido a SO. Incluya más información sobre por qué vale la pena considerar este módulo (en comparación con otros que figuran en otras respuestas) – Jayan

Cuestiones relacionadas