PHP获取Word文档 docx文件内容

发布时间 2023-09-21 13:23:18作者: 79524795

1.下载 库

composer require phpoffice/phpword

2.写代码

2.1引入

点击查看代码
use PhpOffice\PhpWord\IOFactory; 
use PhpOffice\PhpWord\Element\TextRun;
use PhpOffice\PhpWord\Element\Text;
use PhpOffice\PhpWord\Element\Paragraph;

2.2

点击查看代码
            //795新增代码
         
        $phpWord = IOFactory::load($filePath);
        // $section = $phpWord->getSections();
        var_dump($this->getNodeContent($phpWord)) ;
        die;
  //795新增代码  END

2.3

点击查看代码
 //++++-----795新增代码-------------+++++++
        
         
        /**
         * 根据word主节点获取分节点内容
         * @param $word
         * @return array
         */
        public function getNodeContent($word)
        {
            $return = [];
            //分解部分
            foreach ($word->getSections() as $section)
            {
                if ($section instanceof \PhpOffice\PhpWord\Element\Section) {
                    //分解元素
                    foreach ($section->getElements() as $element)
                    {
                        //文本元素
                        if ($element instanceof \PhpOffice\PhpWord\Element\TextRun) {
                            $text = '';
                            foreach ($element->getElements() as $ele) {
                                $text .= $this->getTextNode($ele);
                            }
                            $return[] = $text;
                        }
                        //表格元素
                        else if ($element instanceof \PhpOffice\PhpWord\Element\Table) {
                            foreach ($element->getRows() as $ele)
                            {
                                $return[] = $this->getTableNode($ele);
                            }
                        }
                    }
                }
            }
            return $return;
        }
         
        /**
         * 获取文档节点内容
         * @param $node
         * @return string
         */
        public function getTextNode($node)
        {
            $return = '';
            //处理文本
            if ($node instanceof \PhpOffice\PhpWord\Element\Text)
            {
                $return .= $node->getText();
            }
            //处理图片
            else if ($node instanceof \PhpOffice\PhpWord\Element\Image)
            {
                $return .= $this->pic2text($node);
            }
            //处理文本元素
            else if ($node instanceof \PhpOffice\PhpWord\Element\TextRun) {
                foreach ($node->getElements() as $ele) {
                    $return .= $this->getTextNode($ele);
                }
            }
            return $return;
        }
         
        /**
         * 获取表格节点内容
         * @param $node
         * @return string
         */
        public function getTableNode($node)
        {
            $return = '';
            //处理行
            if ($node instanceof \PhpOffice\PhpWord\Element\Row) {
                foreach ($node->getCells() as $ele)
                {
                    $return .= $this->getTableNode($ele);
                }
            }
            //处理列
            else if ($node instanceof \PhpOffice\PhpWord\Element\Cell) {
                foreach ($node->getElements() as $ele)
                {
                    $return .= $this->getTextNode($ele);
                }
            }
            return $return;
        }
         
        /**
         * 处理word文档中base64格式图片
         * @param $node
         * @return string
         */
        public function pic2text($node)
        {
            //获取图片编码
            $imageData = $node->getImageStringData(true);
            //添加图片html显示标头
            $imageData = 'data:' . $node->getImageType() . ';base64,' . $imageData;
            $return = '<img src="'.$imageData.'">';
            return $return;
        }
        
        
        //++++-----795新增代码END-------------+++++++