ter = array_value($config, 'filter');
$arr = array_value($filter, $type);
$enable = array_value($arr, 'enable');
$wordarr = array_value($arr, 'keyword');
if (0 == $enable || empty($wordarr)) return FALSE;
foreach ($wordarr as $_keyword) {
if (!$_keyword) continue;
$r = strpos(strtolower($keyword), strtolower($_keyword));
if (FALSE !== $r) {
$error = $_keyword;
return TRUE;
}
}
return FALSE;
}
// return http://domain.com OR https://domain.com
function url_prefix()
{
$http = ((isset($_SERVER['HTTPS']) && 'on' == $_SERVER['HTTPS']) || (isset($_SERVER['HTTP_X_FORWARDED_PROTO']) && $_SERVER['HTTP_X_FORWARDED_PROTO'] == 'https')) ? 'https://' : 'http://';
return $http . $_SERVER['HTTP_HOST'];
}
// 唯一身份ID
function uniq_id()
{
return uniqid(substr(md5(microtime(true) . mt_rand(1000, 9999)), 8, 8));
}
// 生成订单号 14位
function trade_no()
{
$trade_no = str_replace('.', '', microtime(1));
$strlen = mb_strlen($trade_no, 'UTF-8');
$strlen = 14 - $strlen;
$str = '';
if ($strlen) {
for ($i = 0; $i <= $strlen; $i++) {
if ($i < $strlen) $str .= '0';
}
}
return $trade_no . $str;
}
// 生成订单号 16位
function trade_no_16()
{
$explode = explode(' ', microtime());
$trade_no = $explode[1] . mb_substr($explode[0], 2, 6, 'UTF-8');
return $trade_no;
}
// 当前年的天数
function date_year($time = NULL)
{
$time = intval($time) ? $time : time();
return date('L', $time) + 365;
}
// 当前年份中的第几天
function date_z($time = NULL)
{
$time = intval($time) ? $time : time();
return date('z', $time);
}
// 当前月份中的第几天,没有前导零 1 到 31
function date_j($time = NULL)
{
$time = intval($time) ? $time : time();
return date('j', $time);
}
// 当前月份中的第几天,有前导零的2位数字 01 到 31
function date_d($time = NULL)
{
$time = intval($time) ? $time : time();
return date('d', $time);
}
// 当前时间为星期中的第几天 数字表示 1表示星期一 到 7表示星期天
function date_w_n($time = NULL)
{
$time = intval($time) ? $time : time();
return date('N', $time);
}
// 当前日第几周
function date_d_w($time = NULL)
{
$time = intval($time) ? $time : time();
return date('W', $time);
}
// 当前几月 没有前导零1-12
function date_n($time = NULL)
{
$time = intval($time) ? $time : time();
return date('n', $time);
}
// 当前月的天数
function date_t($time = NULL)
{
$time = intval($time) ? $time : time();
return date('t', $time);
}
// 0 o'clock on the day
function clock_zero()
{
return strtotime(date('Ymd'));
}
// 24 o'clock on the day
function clock_twenty_four()
{
return strtotime(date('Ymd')) + 86400;
}
// 8点过期 / expired at 8 a.m.
function eight_expired($time = NULL)
{
$time = intval($time) ? $time : time();
// 当前时间大于8点则改为第二天8点过期
$life = date('G') <= 8 ? (strtotime(date('Ymd')) + 28800 - $time) : clock_twenty_four() - $time + 28800;
return $life;
}
// 24点过期 / expired at 24 a.m.
function twenty_four_expired($time = NULL)
{
$time = intval($time) ? $time : time();
$twenty_four = clock_twenty_four();
$life = $twenty_four - $time;
return $life;
}
/**
* @param $url 提交地址
* @param string $post POST数组 / 空为GET获取数据 / $post='GET'获取连续跳转最终URL
* @param string $cookie cookie
* @param int $timeout 超时
* @param int $ms 设为1是毫秒
* @return mixed 返回数据
*/
function https_request($url, $post = '', $cookie = '', $timeout = 30, $ms = 0)
{
if (empty($url)) return FALSE;
if (version_compare(PHP_VERSION, '5.2.3', '<')) {
$ms = 0;
$timeout = 30;
}
is_array($post) and $post = http_build_query($post);
// 没有安装curl 使用http的形式,支持post
if (!extension_loaded('curl')) {
//throw new Exception('server not install CURL');
if ($post) {
return https_post($url, $post, $cookie, $timeout);
} else {
return http_get($url, $cookie, $timeout);
}
}
is_array($cookie) and $cookie = http_build_query($cookie);
$curl = curl_init();
// 返回执行结果,不输出
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
//php5.5跟php5.6中的CURLOPT_SAFE_UPLOAD的默认值不同
if (class_exists('\CURLFile')) {
curl_setopt($curl, CURLOPT_SAFE_UPLOAD, true);
} else {
defined('CURLOPT_SAFE_UPLOAD') and curl_setopt($curl, CURLOPT_SAFE_UPLOAD, false);
}
// 设定请求的RUL
curl_setopt($curl, CURLOPT_URL, $url);
// 设定返回信息中包含响应信息头
if (ini_get('safe_mode') && ini_get('open_basedir')) {
// $post参数必须为GET
if ('GET' == $post) {
// 安全模式时将头文件的信息作为数据流输出
curl_setopt($curl, CURLOPT_HEADER, true);
// 安全模式采用连续抓取
curl_setopt($curl, CURLOPT_NOBODY, true);
}
} else {
curl_setopt($curl, CURLOPT_HEADER, false);
// 允许跳转10次
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
// 使用自动跳转,返回最后的Location
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
}
$ua1 = 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1';
$ua = empty($_SERVER["HTTP_USER_AGENT"]) ? $ua1 : $_SERVER["HTTP_USER_AGENT"];
curl_setopt($curl, CURLOPT_USERAGENT, $ua);
// 兼容HTTPS
if (FALSE !== stripos($url, 'https://')) {
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, FALSE);
//ssl版本控制
//curl_setopt($curl, CURLOPT_SSLVERSION, CURL_SSLVERSION_TLSv1);
curl_setopt($curl, CURLOPT_SSLVERSION, true);
}
$header = array('Content-type: application/x-www-form-urlencoded;charset=UTF-8', 'X-Requested-With: XMLHttpRequest');
$cookie and $header[] = "Cookie: $cookie";
curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
if ($post) {
// POST
curl_setopt($curl, CURLOPT_POST, true);
// 自动设置Referer
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_POSTFIELDS, $post);
}
if ($ms) {
curl_setopt($curl, CURLOPT_NOSIGNAL, true); // 设置毫秒超时
curl_setopt($curl, CURLOPT_TIMEOUT_MS, intval($timeout)); // 超时毫秒
} else {
curl_setopt($curl, CURLOPT_TIMEOUT, intval($timeout)); // 秒超时
}
//优先解析 IPv6 超时后IPv4
//curl_setopt($curl, CURLOPT_IPRESOLVE, CURL_IPRESOLVE_V4);
curl_setopt($curl, CURLOPT_ENCODING, 'gzip');
// 返回执行结果
$output = curl_exec($curl);
// 有效URL,输出URL非URL页面内容 CURLOPT_RETURNTRANSFER 必须为false
'GET' == $post and $output = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL);
curl_close($curl);
return $output;
}
function save_image($img)
{
$ch = curl_init();
// 设定请求的RUL
curl_setopt($ch, CURLOPT_URL, $img);
// 设定返回信息中包含响应信息头 启用时会将头文件的信息作为数据流输出
//curl_setopt($ch, CURLOPT_HEADER, false);
//curl_setopt($ch, CURLOPT_USERAGENT, $_SERVER["HTTP_USER_AGENT"]);
// true表示$html,false表示echo $html
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 10);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
//curl_setopt($ch, CURLOPT_BINARYTRANSFER, 1);
//curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 0);
curl_setopt($ch, CURLOPT_ENCODING, 'gzip');
$output = curl_exec($ch);
curl_close($ch);
return $output;
}
// 计算字串宽度:剧中对齐(字体大小/字串内容/字体链接/背景宽度/倍数)
function calculate_str_width($size, $str, $font, $width, $multiple = 2)
{
$box = imagettfbbox($size, 0, $font, $str);
return ($width - $box[4] - $box[6]) / $multiple;
}
// 搜索目录下的文件 比对文件后缀
function search_directory($path)
{
if (is_dir($path)) {
$paths = scandir($path);
foreach ($paths as $val) {
$sub_path = $path . '/' . $val;
if ('.' == $val || '..' == $val) {
continue;
} else if (is_dir($sub_path)) {
//echo '目录名:' . $val . ' ';
search_directory($sub_path);
} else {
//echo ' 最底层文件: ' . $path . '/' . $val . '
';
$ext = strtolower(file_ext($sub_path));
if (in_array($ext, array('php', 'asp', 'jsp', 'cgi', 'exe', 'dll'), TRUE)) {
echo '异常文件:' . $sub_path . ' ';
}
}
}
}
}
// 一维数组转字符串 $sign待签名字符串 $url为urlencode转码GET参数字符串
function array_to_string($arr, &$sign = '', &$url = '')
{
if (count($arr) != count($arr, 1)) throw new Exception('Does not support multi-dimensional array to string');
// 注销签名
unset($arr['sign']);
// 排序
ksort($arr);
reset($arr);
// 转字符串做签名
$url = '';
$sign = '';
foreach ($arr as $key => $val) {
if (empty($val) || is_array($val)) continue;
$url .= $key . '=' . urlencode($val) . '&';
$sign .= $key . '=' . $val . '&';
}
$url = substr($url, 0, -1);
$url = htmlspecialchars($url);
$sign = substr($sign, 0, -1);
}
// 私钥生成签名
function rsa_create_sign($data, $key, $sign_type = 'RSA')
{
if (!function_exists('openssl_sign')) throw new Exception('OpenSSL extension is not enabled');
if (!defined('OPENSSL_ALGO_SHA256')) throw new Exception('Only versions above PHP 5.4.8 support SHA256');
$key = wordwrap($key, 64, "\n", true);
if (FALSE === $key) throw new Exception('Private Key Error');
$key = "-----BEGIN RSA PRIVATE KEY-----\n$key\n-----END RSA PRIVATE KEY-----";
if ('RSA2' == $sign_type) {
openssl_sign($data, $sign, $key, OPENSSL_ALGO_SHA256);
} else {
openssl_sign($data, $sign, $key, OPENSSL_ALGO_SHA1);
}
// 加密
return base64_encode($sign);
}
// 公钥验证签名
function rsa_verify_sign($data, $sign, $key, $sign_type = 'RSA')
{
$key = wordwrap($key, 64, "\n", true);
if (FALSE === $key) throw new Exception('Public Key Error');
$key = "-----BEGIN PUBLIC KEY-----\n$key\n-----END PUBLIC KEY-----";
// 签名正确返回1 签名不正确返回0 错误-1
if ('RSA2' == $sign_type) {
$result = openssl_verify($data, base64_decode($sign), $key, OPENSSL_ALGO_SHA256);
} else {
$result = openssl_verify($data, base64_decode($sign), $key, OPENSSL_ALGO_SHA1);
}
return $result === 1;
}
// Array to xml array('appid' => 'appid', 'code' => 'success')
function array_to_xml($arr)
{
if (!is_array($arr) || empty($arr)) throw new Exception('Array Error');
$xml = "";
foreach ($arr as $key => $val) {
if (is_numeric($val)) {
$xml .= "<" . $key . ">" . $val . "" . $key . ">";
} else {
$xml .= "<" . $key . ">" . $key . ">";
}
}
$xml .= " ";
return $xml;
}
// Xml to array
function xml_to_array($xml)
{
if (!$xml) throw new Exception('XML error');
$old = libxml_disable_entity_loader(true);
// xml解析
$result = (array)simplexml_load_string($xml, null, LIBXML_NOCDATA | LIBXML_COMPACT);
// 恢复旧值
if (FALSE === $old) libxml_disable_entity_loader(false);
return $result;
}
// 逐行读取
function well_import($file)
{
if ($handle = fopen($file, 'r')) {
while (!feof($handle)) {
yield trim(fgets($handle));
}
fclose($handle);
}
}
// 计算总行数
function well_import_total($file, $key = 'well_import_total')
{
static $cache = array();
if (isset($cache[$key])) return $cache[$key];
$count = cache_get($key);
if (NULL === $count) {
$count = 0;
$globs = well_import($file);
while ($globs->valid()) {
++$count;
$globs->next(); // 指向下一个
}
$count and cache_set($key, $count, 300);
}
return $cache[$key] = $count;
}
$g_dir_file = FALSE;
function well_search_dir($path)
{
global $g_dir_file;
FALSE === $g_dir_file and $g_dir_file = array();
if (is_dir($path)) {
$paths = scandir($path);
foreach ($paths as $val) {
$sub_path = $path . '/' . $val;
if ('.' == $val || '..' == $val) {
continue;
} else if (is_dir($sub_path)) {
well_search_dir($sub_path);
} else {
$g_dir_file[] = $sub_path;
}
}
}
return $g_dir_file;
}
?>4.A Sentimental Education: Sentiment Analysis Using SubjectivitySummarization Based on Minimum Cuts-阿南达文事网
4.A Sentimental Education: Sentiment Analysis Using SubjectivitySummarization Based on Minimum Cuts 编程日记 65 0
更新时间:2025-05-10 13:17:46
4.A Sentimental Education: Sentiment Analysis Using SubjectivitySummarization Based on Minimum Cuts
A Sentimental Education: Sentiment Analysis Using Subjectivity Summarization Based on Minimum Cuts
情感教育:基于最小切分的主观概括的情感分析
一、摘要
情感分析寻求识别文本范围内的观点。 一个示例应用程序将电影评论分为“竖起大拇指”或“竖起大拇指”。 为了确定这种情感极性,我们提出了一种新颖的机器学习方法,该方法将文本分类技术应用于文档的主观部分。 可以使用有效的技术来提取这些部分,以找到图形中的最小切口; 这极大地促进了跨句上下文约束的融合
二、Method
2.1 Architecture 首先使用主观性检测器来确定每个句子是否是主观的。丢弃客观的内容会创建一个摘要,该摘要应更好地将评论的主观内容呈现给默认的极性分类器。
2.2 Context and Subjectivity Detection we use an efficient and intuitive graph-based formulation relying on find-ing minimum cuts. 我们使用基于查找最小割线的高效直观的基于图形的公式 whereas we are concerned with physical proximity between the items to be classified;而我们关注要分类的项目之间的物理距离
2.3 Cut-based classification(基于切割的分类) I)non-negative estimates of each xi’s preference for being in Cj based on just the features of xi alone 仅根据xi的特征对每个xi在Cj中的偏好进行非负估计 II)Association scores assoc(xi, xk): non-negative estimates of how important it is that xi and xk be in the same class. 关联评分assoc(xi,xk):xi和xk在同一类别中的重要性的非负估计。
三、Evaluation Framework
polarity dataset Default polarity classifiers Subjectivity dataset i)we collected 5000 movie-review snippets from www.rottentomatoes. ii)To obtain (mostly) objective data, we took 5000 sen-tences from plot summaries available from the In-ternet Movie Database (www.imdb). Subjectivity detectors we can useour default polarity classifiers as “basic” sentence-level subjectivity detectors.我们可以使用默认的极性分类器作为“基本”句子级主观性检测器
四、Experimental Results
As we will see, the use of subjectivity extracts can in the best case provide satisfying improvement in polarity classification.正如我们将看到的,在最佳情况下,使用主观性提取可以在极性分类方面提供令人满意的改进。We therefore conclude that subjectivity ex-traction produces effective summaries of document sentiment.因此,我们得出的结论是,主观性提取产生了有效的文献情感总结。
4.1 Basic subjectivity extraction基本主观性提取 As noted in Section 3, both Naive Bayes and SVMs can be trained on our subjectivity dataset and then used as a basic subjectivity detector.如第3节所述,朴素贝叶斯和SVM都可以在我们的主观性数据集上进行训练,然后用作基本的主观性检测器。(前者在主观性数据集上具有更好的平均十倍交叉验证性能)
4.2 Incorporating context information The previous section demonstrated the value of subjectivity detection. We now examine whether context information, particularly regarding sentence proximity, can further improve subjectivity extraction. As discussed in Section 2.2 and 3, con-textual constraints are easily incorporated via the minimum-cut formalism but are not natural inputs for standard Naive Bayes and SVMs.上一节演示了主观性检测的价值。 现在我们检查上下文信息,尤其是关于句子邻近性的信息是否可以进一步改善主观性的提取。如2.2和3节所述,上下文约束很容易通过最小化形式主义并入,但不是标准朴素贝叶斯和SVM的自然输入
附录
本文发布于:2024-11-10,感谢您对本站的认可!
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签: 4A Sentimental Education Sentiment Analysis Using SubjectivitySummarization Based on Minimum Cuts
发布评论