<?php
#BEGIN_LICENSE
#-------------------------------------------------------------------------
# Module: DocumentSearch (c) 2013 by Oliver Seddon 
#         (oliver@threefold.co.uk)
#  An addon module for CMS Made Simple to provide PDF and Word document
#  content searchability.
# 
#-------------------------------------------------------------------------
# CMS - CMS Made Simple is (c) 2005 by Ted Kulp (wishy@cmsmadesimple.org)
# This project's homepage is: http://www.cmsmadesimple.org
#
#-------------------------------------------------------------------------
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# However, as a special exception to the GPL, this software is distributed
# as an addon module to CMS Made Simple.  You may not use this software
# in any Non GPL version of CMS Made simple, or in any version of CMS
# Made simple that does not indicate clearly and obviously in its admin 
# section that the site was built with CMS Made simple.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# Or read it online: http://www.gnu.org/licenses/licenses.html#GPL
#
#-------------------------------------------------------------------------
#END_LICENSE
if (!isset($gCms)) exit;

include(dirname(__FILE__).'/lib/class.pdf2text.php');

// get our records from the database
$db = $gCms->GetDb();

if (isset($params['cancel'])) {
	$this->Redirect($id, 'defaultadmin', $returnid);
}

if (isset($params['documentsearch_id']) && $params['documentsearch_id'] != -1) {

	// we received a documentsearch_id that was not -1, which means we're updating an
	// existing record. So we issue a SQL Update

	if ( !isset($_FILES[$id .'locationfile']['name']) || $_FILES[$id .'locationfile']['name'] == '' ) {

		$postdate = time();
		if (isset($params['postdate_Month'])) {
			$postdate = mktime($params['postdate_Hour'], $params['postdate_Minute'], $params['postdate_Second'], $params['postdate_Month'], $params['postdate_Day'], $params['postdate_Year']);
		}

    	$documentid = $params['documentsearch_id'];
		$query = 'UPDATE '.cms_db_prefix().
    		'module_documentsearch set name=?, documentsearch_category_id=?, author=?, status=?, document_date=? where documentsearch_id = ?';
		$result = $db->Execute($query,array($params['name'],$params['category'],$params['author'],$params['status'],trim($db->DBTimeStamp($postdate), "'"),$params['documentsearch_id']));
		$params['module_message'] = $this->Lang('added_record');

	} else {

		$postdate = time();
		if (isset($params['postdate_Month'])) {
			$postdate = mktime($params['postdate_Hour'], $params['postdate_Minute'], $params['postdate_Second'], $params['postdate_Month'], $params['postdate_Day'], $params['postdate_Year']);
		}

    	$documentid = $params['documentsearch_id'];

    	// check to see if a file has been selected
    	$config = cmsms()->GetConfig();
    	$fieldname = $id .'locationfile';
    	$mod = cms_utils::get_module('DocumentSearch');
    	$p = cms_join_path($config['uploads_path'],'documentsearch');
    	if (!is_dir($p)) {
      		$res = @mkdir($p);
      		if( $res === FALSE ) {
				$this->DisplayErrorPage($id, $params, $returnid,
					$this->Lang('error_mkdir'));
	    		return FALSE;
      		}
    	}

    	$p = cms_join_path($config['uploads_path'],'documentsearch','id'.$documentid);
    	if (!is_dir($p)) {
      		if( @mkdir($p) === FALSE ) {
				$this->DisplayErrorPage($id, $params, $returnid,
					$this->Lang('error_mkdir'));
	    		return FALSE;
      		}
    	}

    	$filename = basename($_FILES[$fieldname]['name']);
    	$dest = cms_join_path($config['uploads_path'],'documentsearch','id'.$documentid,$filename);
    	$params['location'] = $filename;

    	// Get the files extension
    	$ext = substr(strrchr($filename, '.'), 1);

    	// compare it against the 'allowed extentions'
    	$exts = explode(',',$mod->GetPreference('allowed_upload_types',''));
    	if( !in_array( $ext, $exts ) ) {
			$this->DisplayErrorPage($id, $params, $returnid,
				$this->Lang('error_invalidfiletype'));
			return FALSE;
      	}

    	if( @cms_move_uploaded_file($_FILES[$fieldname]['tmp_name'], $dest) === FALSE ) {
			$this->DisplayErrorPage($id, $params, $returnid,
				$this->Lang('error_movefile'));
			return FALSE;
      	}
        if ($ext == 'pdf') {
			$filepdf = new pdf2text();
			$filepdf->setFilename($dest);
			$filepdf->decodePDF();
    		$params['content'] = $filepdf->output();
		} elseif ($ext == 'docx') {
			function docx2text($filenamedocx) {
    			return readZippedXML($filenamedocx, "word/document.xml");
			}
			function readZippedXML($archiveFile, $dataFile) {
    			// Create new ZIP archive
    			$zip = new ZipArchive;

    			// Open received archive file
    			if (true === $zip->open($archiveFile)) {
        			// If done, search for the data file in the archive
        			if (($index = $zip->locateName($dataFile)) !== false) {
            			// If found, read it to the string
            			$data = $zip->getFromIndex($index);
            			// Close archive file
            			$zip->close();
            			// Load XML from a string
            			// Skip errors and warnings
            			$xml = DOMDocument::loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
            			// Return data without XML formatting tags
            			return strip_tags($xml->saveXML());
        			}
        			$zip->close();
    			}
    			// In case of failure return empty string
    			return "";
			}
			$params['content'] = docx2text($dest);
		} elseif ($ext == 'doc') {
			function parseWord($userDoc) {
				$fileHandle = fopen($userDoc, "r");
				$line = @fread($fileHandle, filesize($userDoc));   
				$lines = explode(chr(0x0D),$line);
				$outtext = "";
				foreach($lines as $thisline) {
					$pos = strpos($thisline, chr(0x00));
					if (($pos !== FALSE)||(strlen($thisline)==0)) {
					} else {
						$outtext .= $thisline." ";
					}
				}
				$outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext);
				return $outtext;
			} 
			$params['content'] = parseWord($dest);
		}

		$query = 'UPDATE '.cms_db_prefix().
    		'module_documentsearch set name=?, content=?, documentsearch_category_id=?, location=?, author=?, status=?, document_date=? where documentsearch_id = ?';
		$result = $db->Execute($query,array($params['name'],$params['content'],$params['category'],$params['location'],$params['author'],$params['status'],'9999',$params['documentsearch_id']));
		$params['module_message'] = $this->Lang('added_record');

		// Make content accessible to the search module
		$search = $this->GetModuleInstance('Search');
		if( $search ) {
			$search->AddWords( $this->Getname(), $params['documentsearch_id'], 'documentsearch', $params['content'], NULL);
		}
	}

} else {

	if ( !isset($_FILES[$id .'locationfile']['name']) || $_FILES[$id .'locationfile']['name'] == '' ) {

		// we received no documentsearch_id or one that was -1, which means we're creating
		// a new record. So we issue a SQL Insert. But first, we use the sequence to get a fresh ID

    	$sid = $db->GenID(cms_db_prefix().'module_documentsearch_seq');
    	$documentid = $params['documentsearch_id'];

		$postdate = time();
		if (isset($params['postdate_Month'])) {
			$postdate = mktime($params['postdate_Hour'], $params['postdate_Minute'], $params['postdate_Second'], $params['postdate_Month'], $params['postdate_Day'], $params['postdate_Year']);
		}

		$query = 'INSERT INTO '.cms_db_prefix().
    		'module_documentsearch (documentsearch_id, name, content) VALUES (?,?,?,?,?,?)';
		$result = $db->Execute($query,array($sid,$params['name'],$params['content'],$params['author'],$params['status'],trim($db->DBTimeStamp($postdate), "'")));
		$params['module_message'] = $this->Lang('added_record');

	} else {

		$postdate = time();
		if (isset($params['postdate_Month'])) {
			$postdate = mktime($params['postdate_Hour'], $params['postdate_Minute'], $params['postdate_Second'], $params['postdate_Month'], $params['postdate_Day'], $params['postdate_Year']);
		}

    	$sid = $db->GenID(cms_db_prefix().'module_documentsearch_seq');
    	$documentid = $params['documentsearch_id'];

    	$config = cmsms()->GetConfig();
		$fieldname = $id .'locationfile';
    	$mod = cms_utils::get_module('DocumentSearch');
    	$p = cms_join_path($config['uploads_path'],'documentsearch');
    	if (!is_dir($p)) {
      		$res = @mkdir($p);
      			if( $res === FALSE ) {
					$this->DisplayErrorPage($id, $params, $returnid,
				   		$this->Lang('error_mkdir'));
	    			return FALSE;
      			}
    	}

    	$p = cms_join_path($config['uploads_path'],'documentsearch','id'.$sid);
    	if (!is_dir($p)) {
      		if( @mkdir($p) === FALSE ) {
				$this->DisplayErrorPage($id, $params, $returnid,
				   	$this->Lang('error_mkdir'));
	    		return FALSE;
      		}
    	}

    	$filename = basename($_FILES[$fieldname]['name']);
    	$dest = cms_join_path($config['uploads_path'],'documentsearch','id'.$sid,$filename);
    	$params['location'] = $filename;

    	// Get the files extension
    	$ext = substr(strrchr($filename, '.'), 1);

    	// compare it against the 'allowed extentions'
    	$exts = explode(',',$mod->GetPreference('allowed_upload_types',''));
    	if( !in_array( $ext, $exts ) ) {
			$this->DisplayErrorPage($id, $params, $returnid,
				$this->Lang('error_invalidfiletype'));
			return FALSE;
      	}

    	if( @cms_move_uploaded_file($_FILES[$fieldname]['tmp_name'], $dest) === FALSE ) {
			$this->DisplayErrorPage($id, $params, $returnid,
				$this->Lang('error_movefile'));
			return FALSE;
      	}
        if ($ext == 'pdf') {
			$filepdf = new pdf2text();
			$filepdf->setFilename($dest);
			$filepdf->decodePDF();
    		$params['content'] = $filepdf->output();
		} elseif ($ext == 'docx') {
			function docx2text($filenamedocx) {
    			return readZippedXML($filenamedocx, "word/document.xml");
			}
			function readZippedXML($archiveFile, $dataFile) {
    			// Create new ZIP archive
    			$zip = new ZipArchive;

    			// Open received archive file
    			if (true === $zip->open($archiveFile)) {
        			// If done, search for the data file in the archive
        			if (($index = $zip->locateName($dataFile)) !== false) {
            			// If found, read it to the string
            			$data = $zip->getFromIndex($index);
            			// Close archive file
            			$zip->close();
            			// Load XML from a string
            			// Skip errors and warnings
            			$xml = DOMDocument::loadXML($data, LIBXML_NOENT | LIBXML_XINCLUDE | LIBXML_NOERROR | LIBXML_NOWARNING);
            			// Return data without XML formatting tags
            			return strip_tags($xml->saveXML());
        			}
        			$zip->close();
    			}
    			// In case of failure return empty string
    			return "";
			}
			$params['content'] = docx2text($dest);
		} elseif ($ext == 'doc') {
			function parseWord($userDoc) {
				$fileHandle = fopen($userDoc, "r");
				$line = @fread($fileHandle, filesize($userDoc));   
				$lines = explode(chr(0x0D),$line);
				$outtext = "";
				foreach($lines as $thisline) {
					$pos = strpos($thisline, chr(0x00));
					if (($pos !== FALSE)||(strlen($thisline)==0)) {
					} else {
						$outtext .= $thisline." ";
					}
				}
				$outtext = preg_replace("/[^a-zA-Z0-9\s\,\.\-\n\r\t@\/\_\(\)]/","",$outtext);
				return $outtext;
			} 
			$params['content'] = parseWord($dest);
		}

		$query = 'INSERT INTO '.cms_db_prefix().
    		'module_documentsearch (documentsearch_id, name, content, documentsearch_category_id, location, author, status, document_date) VALUES (?,?,?,?,?,?,?,?)';
		$result = $db->Execute($query,array($sid,$params['name'],$params['content'],$params['category'],$params['location'],$params['author'],$params['status'],trim($db->DBTimeStamp($postdate), "'")));
		$params['module_message'] = $this->Lang('added_record');

		// Make content accessible to the search module
		$search = $this->GetModuleInstance('Search');
		if( $search ) {
			$search->AddWords( $this->Getname(), $sid, 'documentsearch', $params['content'], NULL);
		}
	}
}

if ($result === false) {
	// yeah, that's graceful :(
	echo "Database error!";
	exit;
}

unset($params['documentsearch_id']);
// set a message and return to the page.
$this->Redirect($id, 'defaultadmin', $returnid);
?>