Here’s an example of how to parse an HTML table into an array using the PEAR module XML_HTMLSax3. It supports the <tr>, <td> and <th> elements and the rowspan and colspan attributes.

It’s worth noting that this code will raise a bunch of notices if you run it displaying all errors. This is pretty difficult to avoid, so if you don’t like that, disable the display of notices.

<?php

/**
 * Example of how to parse an HTML table using PEAR XML_HTMLSax3.
 *
 * Copyright (C) 2007 Toby Inkster
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 3 of the 
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this program.
 * If not, see <http://www.gnu.org/licenses/>.
 *
 * @author Toby Inkster
 * @copyright Copyright (C) 2007 Toby Inkster
 * @license http://www.gnu.org/licenses/gpl-3.0.html GNU General Public Licence
 */

/**
 * Parser class. 
 *
 * You probably only need to directly access the "Go" method.
 */
class TableParser
{
    private $currow = -1;
    private $curcol = -1;

    private $shape = array();
    private $data  = array();

    public function openHandler ($parser$tag$attrs)
    {
        $tag strtolower($tag);

        // Move to the correct cell co-ordinates.
        if ($tag=='tr')
        {
            $this->currow++;
            $this->curcol = -1;
        }
        elseif ($tag=='td'||$tag=='th')
        {
            $this->curcol++;
        }

        // This should account for rowspan and colspan.
        while ($this->shape[$this->currow][$this->curcol])
            $this->curcol++;
        $rowspan 1;
        $colspan 1;
        foreach ($attrs as $k=>$v)
        {
            $k strtolower($k);
            if ($k=='rowspan')
                $rowspan=(int)$v;
            elseif ($k=='colspan')
                $colspan=(int)$v;
        }
        for ($i=0$i<$rowspan$i++)
            for ($j=0$j<$colspan$j++)
            {
                $x $this->currow $i;
                $y $this->curcol $j;
                if ($this->shape[$x][$y])
                    error_log('Overlap!');
                $this->shape[$x][$y] = TRUE;
            }
    }

    public function closeHandler ($parser$tag)
    {
    }

    public function dataHandler ($parser$data)
    {
        $this->data[$this->currow][$this->curcol] .= $data;
    }

    public function getData ()
    {
        unset($this->data[-1]);
        foreach ($this->data as $k=>$v)
            unset($this->data[$k][-1]);
        return $this->data;
    }
    
    public static function Go ($table_html)
    {
        require_once 'XML/HTMLSax3.php';
        $sax  = new XML_HTMLSax3;
        $hdlr = new TableParser;
        $sax->set_object($hdlr);
        $sax->set_element_handler('openHandler''closeHandler');
        $sax->set_data_handler('dataHandler');
        $sax->parse($table_html);
        return $hdlr->getData();
    }
    
}

$table '
<table>
  <tr>
    <td rowspan="2">Test table lalala</td>
    <td>123</td>
    <td>456</td>
  </tr>
  <tr>
    <td>789</td>
    <td>ABC</td>
  </tr>
  <tr>
    <td colspan="2" rowspan="2">123</td>
    <td>456</td>
  </tr>
  <tr>
    <td>789</td>
  </tr>
</table>
';
print_r(TableParser::Go($table));

?>