Class Html2Text
public
|
|
public
|
|
public
|
|
public
string
|
|
public
|
|
public
|
|
public
|
|
public
|
|
public
|
|
protected
|
|
protected
|
|
protected
string
|
#
buildlinkList( string $link, string $display, null $linkOverride = null )
Helper function called by preg_replace() on link replacement. |
protected
|
|
protected
|
|
protected
string
|
|
protected
string
|
#
pregPreCallback( array $matches )
Callback function for preg_replace_callback use in PRE content handler. |
string |
ENCODING
|
#
'UTF-8'
|
protected
|
$html
Contains the HTML content to convert. |
|
protected
|
$text
Contains the converted, formatted text. |
|
protected
array
|
$search
List of preg* regular expression patterns to search for, used in conjunction with $replace. |
#
array(
"/\r/", // Non-legal carriage return
"/[\n\t]+/", // Newlines and tabs
'/<head[^>]*>.*?<\/head>/i', // <head>
'/<script[^>]*>.*?<\/script>/i', // <script>s -- which strip_tags supposedly has problems with
'/<style[^>]*>.*?<\/style>/i', // <style>s -- which strip_tags supposedly has problems with
'/<p[^>]*>/i', // <P>
'/<br[^>]*>/i', // <br>
'/<i[^>]*>(.*?)<\/i>/i', // <i>
'/<em[^>]*>(.*?)<\/em>/i', // <em>
'/(<ul[^>]*>|<\/ul>)/i', // <ul> and </ul>
'/(<ol[^>]*>|<\/ol>)/i', // <ol> and </ol>
'/(<dl[^>]*>|<\/dl>)/i', // <dl> and </dl>
'/<li[^>]*>(.*?)<\/li>/i', // <li> and </li>
'/<dd[^>]*>(.*?)<\/dd>/i', // <dd> and </dd>
'/<dt[^>]*>(.*?)<\/dt>/i', // <dt> and </dt>
'/<li[^>]*>/i', // <li>
'/<hr[^>]*>/i', // <hr>
'/<div[^>]*>/i', // <div>
'/(<table[^>]*>|<\/table>)/i', // <table> and </table>
'/(<tr[^>]*>|<\/tr>)/i', // <tr> and </tr>
'/<td[^>]*>(.*?)<\/td>/i', // <td> and </td>
'/<span class="_html2text_ignore">.+?<\/span>/i', // <span class="_html2text_ignore">...</span>
)
|
protected
array
|
$replace
List of pattern replacements corresponding to patterns searched. |
#
array(
'', // Non-legal carriage return
' ', // Newlines and tabs
'', // <head>
'', // <script>s -- which strip_tags supposedly has problems with
'', // <style>s -- which strip_tags supposedly has problems with
"\n\n", // <P>
"\n", // <br>
'_\\1_', // <i>
'_\\1_', // <em>
"\n\n", // <ul> and </ul>
"\n\n", // <ol> and </ol>
"\n\n", // <dl> and </dl>
"\t* \\1\n", // <li> and </li>
" \\1\n", // <dd> and </dd>
"\t* \\1", // <dt> and </dt>
"\n\t* ", // <li>
"\n-------------------------\n", // <hr>
"<div>\n", // <div>
"\n\n", // <table> and </table>
"\n", // <tr> and </tr>
"\t\t\\1\n", // <td> and </td>
"" // <span class="_html2text_ignore">...</span>
)
|
protected
array
|
$entSearch
List of preg* regular expression patterns to search for, used in conjunction with $entReplace. |
#
array(
'/™/i', // TM symbol in win-1252
'/—/i', // m-dash in win-1252
'/&(amp|#38);/i', // Ampersand: see converter()
'/[ ]{2,}/', // Runs of spaces, post-handling
)
|
protected
array
|
$entReplace
List of pattern replacements corresponding to patterns searched. |
#
array(
'™', // TM symbol
'—', // m-dash
'|+|amp|+|', // Ampersand: see converter()
' ', // Runs of spaces, post-handling
)
|
protected
array
|
$callbackSearch
List of preg* regular expression patterns to search for and replace using callback function. |
#
array(
'/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i', // h1 - h6
'/<(b)( [^>]*)?>(.*?)<\/b>/i', // <b>
'/<(strong)( [^>]*)?>(.*?)<\/strong>/i', // <strong>
'/<(th)( [^>]*)?>(.*?)<\/th>/i', // <th> and </th>
'/<(a) [^>]*href=("|\')([^"\']+)\2([^>]*)>(.*?)<\/a>/i' // <a href="">
)
|
protected
array
|
$preSearch
List of preg* regular expression patterns to search for in PRE body, used in conjunction with $preReplace. |
#
array(
"/\n/",
"/\t/",
'/ /',
'/<pre[^>]*>/',
'/<\/pre>/'
)
|
protected
array
|
$preReplace
List of pattern replacements corresponding to patterns searched for PRE body. |
#
array(
'<br>',
' ',
' ',
'',
'',
)
|
protected
string
|
$preContent
Temporary workspace used during PRE processing. |
#
''
|
protected
string
|
$baseurl
Contains the base URL that relative links should resolve to. |
#
''
|
protected
boolean
|
$converted
Indicates whether content in the $html variable has been converted yet. |
#
false
|
protected
array
|
$linkList
Contains URL addresses from links to be rendered in plain text. |
#
array()
|
protected
array
|
$options
Various configuration options (able to be set in the constructor) |
#
array(
'do_links' => 'inline', // 'none'
// 'inline' (show links inline)
// 'nextline' (show links on the next line)
// 'table' (if a table of link URLs should be listed after the text.
'width' => 70, // Maximum width of the formatted text, in columns.
// Set this value to 0 (or less) to ignore word wrapping
// and not constrain text to a fixed-width column.
)
|