'Google CSE Advanced', 'page callback' => 'drupal_get_form', 'page arguments' => array('google_cse_adv_settings_form'), 'access arguments' => array('administer search'), 'type' => MENU_NORMAL_ITEM ); return $items; } /** * Form callback. */ function google_cse_adv_settings_form() { $form = array(); $form['google_cse_adv_cx'] = array( '#type' => 'textfield', '#title' => t('CX key'), '#description' => t('A unique code that identifies a custom search engine.'), '#size' => 50, '#maxlength' => 100, '#default_value' => variable_get('google_cse_adv_cx', '') ); $form['google_cse_adv_language'] = array( '#type' => 'select', '#title' => t('Language awareness'), '#description' => t('Search results optimalization based on a language.'), '#options' => array( 'none' => t('None'), 'active' => t('Active language') ), '#default_value' => variable_get('google_cse_adv_language', 'none') ); $form['google_cse_adv_populate_search_block'] = array( '#type' => 'checkbox', '#title' => t('Populate search block'), '#description' => t('Populate search block form with a current term.'), '#default_value' => variable_get('google_cse_adv_populate_search_block', 0) ); $form['google_cse_adv_proxy'] = array( '#type' => 'textfield', '#title' => t('Proxy server'), '#description' => t('If your Drupal site lies behind a proxy server, set it\'s address here, e.g. "http://1.2.3.4:8080"'), '#size' => 50, '#maxlength' => 100, '#default_value' => variable_get('google_cse_adv_proxy', '') ); $form['google_cse_adv_results_per_page'] = array( '#type' => 'textfield', '#title' => t('Number of results per page'), '#size' => 3, '#default_value' => variable_get('google_cse_adv_results_per_page', 10), ); return system_settings_form($form); } /** * Settings form validation. */ function google_cse_adv_settings_form_validate($form, &$form_state) { // Ensure that results_per_page is a positive integer. $field = 'google_cse_adv_results_per_page'; $value =& $form_state['values'][$field]; if (!(is_numeric($value) && ($value = (int)$value) && $value > 0)) { form_set_error($field, t("Number of results per page must be a positive integer.")); } } /** * Implementation of hook_menu_alter(). * * @see apachesolr_search_menu_alter() */ function google_cse_adv_menu_alter(&$menu) { if (isset($menu['search/google_cse_adv/%menu_tail'])) { $menu['search']['page callback'] = 'google_cse_adv_search_view'; $menu['search/google_cse_adv/%menu_tail']['page callback'] = 'google_cse_adv_search_view'; } if (isset($menu['search/node/%menu_tail'])) { // Hide the node search tab. $menu['search/node/%menu_tail']['type'] = MENU_CALLBACK; unset($menu['search/node/%menu_tail']['title callback'], $menu['search/node/%menu_tail']['title arguments']); $menu['search/node/%menu_tail']['title'] = 'Search'; } if (isset($menu['search/google_cse_adv/%menu_tail'])) { // Alter the google_cse_adv search tab $menu['search/google_cse_adv/%menu_tail']['weight'] = -10; $menu['search/google_cse_adv/%menu_tail']['title'] = 'Content'; } } /** * Implementation of hook_form_[form_id]_alter(). * * @see apachesolr_search_form_search_theme_form_alter() */ function google_cse_adv_form_search_theme_form_alter(&$form, $form_state) { google_cse_adv_form_search_block_form_alter($form, $form_state); } /** * Implementation of hook_form_[form_id]_alter(). * * @see apachesolr_search_form_search_block_form_alter() */ function google_cse_adv_form_search_block_form_alter(&$form, $form_state) { if (!isset($form['#submit'])) { $form['#submit'] = array('google_cse_adv_search_box_form_submit'); } else { $key = array_search('search_box_form_submit', $form['#submit']); if ($key !== FALSE) { // Replace the search module's function. $form['#submit'][$key] = 'google_cse_adv_search_box_form_submit'; } } // Populate search block with a current term if (variable_get('google_cse_adv_populate_search_block', 0)) { $keys = trim(search_get_keys()); // We have to check if we are on the search page - search_get_keys() // can return some string even if there is no search request if ($keys && arg(0) == 'search' && arg(1) == 'google_cse_adv') { // Classic block if (isset($form['search_block_form'])) { $form['search_block_form']['#default_value'] = $keys; } // Internal block if (isset($form['search_theme_form'])) { $form['search_theme_form']['#default_value'] = $keys; } } } } /** * Process a block search form submission. * * @see apachesolr_search_search_box_form_submit() */ function google_cse_adv_search_box_form_submit($form, &$form_state) { $form_id = $form['form_id']['#value']; $keys = $form_state['values'][$form_id]; // Handle Apache webserver clean URL quirks. if (variable_get('clean_url', '0')) { $keys = str_replace('+', '%2B', $keys); } $form_state['redirect'] = 'search/google_cse_adv/'. trim($keys); } /** * Re-implementation of search_view(). * * @see apachesolr_search_view() * @see search_view() */ function google_cse_adv_search_view($type = NULL) { // Search form submits with POST but redirects to GET. $results = ''; if (!isset($_POST['form_id'])) { if (empty($type)) { // Note: search/X can not be a default tab because it would take on the // path of its parent (search). It would prevent remembering keywords when // switching tabs. This is why we drupal_goto to it from the parent instead. drupal_goto('search/google_cse_adv'); } $keys = trim(search_get_keys()); // Only perform search if there is non-whitespace search term: if ($keys) { // Log the search keys: $log = $keys; watchdog('search', '%keys (@type).', array('%keys' => $log, '@type' => t('Search')), WATCHDOG_NOTICE, l(t('results'), 'search/'. $type .'/'. $keys)); // Collect the search results: $results = search_data($keys, $type); if ($results) { $results = theme('box', t('Search results'), $results); } else { $results = theme('box', t('Your search yielded no results'), search_help('search#noresults', drupal_help_arg())); } } } // Construct the search form. return drupal_get_form('search_form', NULL, $keys, $type) . $results; } /** * Implementation of hook_search(). */ function google_cse_adv_search($op = 'search', $keys = NULL) { switch ($op) { case 'name': return t('Search'); case 'search': $page = isset($_GET['page']) ? $_GET['page'] : 0; $response = google_cse_adv_execute($keys, $page); return google_cse_adv_response_results($response, $keys); } } /** * Sends query to Google's Custom Search Engine and returns response. * Can return cached response unless $reset param is set to TRUE (cached * upon one page load only) - useful for working with a block content. * * Returns a response string (XML). * * @param string $keys * @param int $page * @param bool $reset * @return string */ function google_cse_adv_execute($keys, $page, $reset = FALSE) { static $response = array(); // Cache if (isset($response[$keys]) && FALSE === $reset) { return $response[$keys]; } // Number of results per page. 10 is the default for Google CSE. $rows = (int) variable_get('google_cse_adv_results_per_page', 10); // Language interface. $hl = google_cse_adv_param_hl(); // Language filtering. $lr = google_cse_adv_param_lr(); $url = 'http://www.google.com/cse' . '?cx=' . urlencode(variable_get('google_cse_adv_cx', '')) . '&client=google-csbe' . '&output=xml_no_dtd' . ($hl ? '&hl=' . urlencode($hl) : '') . ($lr ? '&lr=' . urlencode($lr) : '') . '&q=' . urlencode($keys) . (isset($_GET['more']) ? '+more:' . urlencode($_GET['more']) : '') . '&num=' . urlencode($rows) . '&start=' . urlencode($page * $rows); $request = new stdClass(); $request->url = $url; module_invoke_all('google_cse_adv_execute', $request); $curl = curl_init(); curl_setopt($curl, CURLOPT_URL, $request->url); curl_setopt($curl, CURLOPT_RETURNTRANSFER, 1); // Return into a variable if (($proxy = variable_get('google_cse_adv_proxy', '')) > '') { curl_setopt($curl, CURLOPT_PROXY, $proxy); } $response[$keys] = curl_exec($curl); curl_close($curl); watchdog('search', 'Google CSE request: ' . $request->url, array(), WATCHDOG_DEBUG); return $response[$keys]; } function google_cse_adv_response_results($response, $keys) { $xml = simplexml_load_string($response); $results = array(); if (isset($xml->RES->R)) { // Cap the result total if necessary. // Google will not return more than 1000 results, but RES->M may // be higher than this, which messes up our paging. Retain a copy // of the original so that themers can still display it. $max_results = variable_get('google_cse_adv_maximum_results', 1000); $total = (int) $xml->RES->M; $xml->RES->M_ORIGINAL = $total; if ($total > $max_results) { $xml->RES->M = $total = $max_results; } foreach($xml->RES->R as $result) { // Markup is escaped in title - we have to strip it (and strip_tags is not enough protection for XSS) $title = strip_tags((string)$result->T); $title = function_exists('htmlspecialchars_decode') ? htmlspecialchars_decode($title, ENT_QUOTES) : $title; $snippet = check_plain(strip_tags((string)$result->S)); $snippet = function_exists('htmlspecialchars_decode') ? htmlspecialchars_decode($snippet, ENT_QUOTES) : $snippet; $results[] = array( 'link' => (string)$result->U, 'title' => $title, 'snippet' => $snippet, 'keys' => check_plain($keys), ); } // No pager query was executed - we have to set the pager manually $limit = variable_get('google_cse_adv_results_per_page', 10); _google_cse_adv_pager_init($total, $limit); } return $results; } function google_cse_adv_response_filters($response) { $xml = simplexml_load_string($response); $results = array(); if (isset($xml->Context->Facet)) { $filters = array(); // Find "more:" (filter) part // We will take it from result rather then from URL because the request // can be changed during its generation. Also, "Q" property contains // an unescaped string so there is " more:" rather than "+more:". $matches = array(); preg_match('/ more:([^ ]+)/', (string)$xml->Q, $matches); $more = isset($matches[1]) ? $matches[1] : ''; foreach ($xml->Context->Facet as $val) { $label = (string)$val->FacetItem->label; $text = t((string)$val->FacetItem->anchor_text); if ($label == $more) { $active = TRUE; } else { $active = FALSE; } $filters[] = array( 'label' => $label, 'text' => $text, 'active' => $active ); } $data = new stdClass(); $data->filters = $filters; $data->response = $response; module_invoke_all('google_cse_adv_filters', $data); $filters = $data->filters; foreach ($filters as $filter) { $results[] = google_cse_adv_filter_l( '' . t(check_plain($filter['text'])) . '', $_GET['q'], array( 'attributes' => array('class' => 'filter-link filter-link-' . check_plain($filter['label']), 'title' => check_plain($filter['label'])), 'query' => array('more' => $filter['label']), 'active' => $filter['active'], 'html' => TRUE ) ); } } if (count($results)) { return theme('item_list', $results); } return ''; } function google_cse_adv_response_suggestion($response) { $xml = simplexml_load_string($response); if (isset($xml->Spelling->Suggestion)) { $suggestions = array(); foreach ($xml->Spelling->Suggestion as $suggestion) { $query = array(); // l() argument $q = (string)$suggestion['q']; $term = preg_replace('/ more:.*/i', '', $q); if (strpos($q, ' more:')) { $query['more'] = preg_replace('/.* more:/i', '', $q); } $url = 'search/google_cse_adv/' . $term; $link = l($term, $url, array('query' => $query)); $suggestions[] = $link; } return theme('google_cse_adv_suggestion', $suggestions); } return ''; } /** * Implementation of hook_block(). */ function google_cse_adv_block($op = 'list', $delta = 0, $edit = array()) { switch ($op) { case 'list'; return array( 0 => array( 'info' => t('Google CSE Advanced filters') ), 1 => array( 'info' => t('Google CSE Advanced suggestion') ) ); case 'view'; switch ($delta) { case 0: return google_cse_adv_block_filters(); case 1: return google_cse_adv_block_suggestion(); } } } function google_cse_adv_block_filters() { $block = array( 'subject' => t('Filters'), 'content' => '' ); $keys = search_get_keys(); // Execute only if there is something to be found (keys) // Note: search_get_keys() can return result even if there // was no search request so we have to check if we are on // the search page. if (user_access('search content') && $keys && arg(0) == 'search' && arg(1) == 'google_cse_adv') { $page = isset($_GET['page']) ? $_GET['page'] : 0; $response = google_cse_adv_execute($keys, $page); $block['content'] = google_cse_adv_response_filters($response); } return $block; } function google_cse_adv_block_suggestion() { $block = array( 'subject' => t('Spelling Suggestion'), 'content' => '' ); $keys = search_get_keys(); // Execute only if there is something to be found (keys) // Note: search_get_keys() can return result even if there // was no search request so we have to check if we are on // the search page. if (user_access('search content') && $keys && arg(0) == 'search' && arg(1) == 'google_cse_adv') { $page = isset($_GET['page']) ? $_GET['page'] : 0; $response = google_cse_adv_execute($keys, $page); $block['content'] = google_cse_adv_response_suggestion($response); } return $block; } /** * Returns "hl" language param for search request. * * @return string */ function google_cse_adv_param_hl() { switch (variable_get('google_cse_adv_language', '')) { case 'active': global $language; return $language->language; default: return ''; } } /** * Returns "lr" language param for search request. * * According to Google documentation, lr parameter text is 'lang_LANGPREFIX'. * We have to trust Drupal users that they don't use custom language prefixes * for their languages. * TODO: Support custom language prefixes? * * @see http://www.google.com/cse/docs/resultsxml.html#languageCollections * * @return string */ function google_cse_adv_param_lr() { switch (variable_get('google_cse_adv_language', '')) { case 'active': global $language; return 'lang_' . $language->language; default: return ''; } } /** * A replacement for l() * - doesn't add the default 'active' class because we have to check * even the filter which may not be a part of 'q' * * @see l() */ function google_cse_adv_filter_l($text, $path, $options = array()) { global $language; // Merge in defaults. $options += array( 'attributes' => array(), 'html' => FALSE, ); // Adds active if (!empty($options['active'])) { if (isset($options['attributes']['class'])) { $options['attributes']['class'] .= ' active'; } else { $options['attributes']['class'] = 'active'; } } // Remove all HTML and PHP tags from a tooltip. For best performance, we act only // if a quick strpos() pre-check gave a suspicion (because strip_tags() is expensive). if (isset($options['attributes']['title']) && strpos($options['attributes']['title'], '<') !== FALSE) { $options['attributes']['title'] = strip_tags($options['attributes']['title']); } return ''. ($options['html'] ? $text : check_plain($text)) .''; } /** * Implementation of hook_form_[form_id]_alter(). * * This adds the 0 option to the search admin form. * * @see apachesolr_search_form_search_admin_settings_alter() */ function google_cse_adv_form_search_admin_settings_alter(&$form, $form_state) { $form['indexing_throttle']['search_cron_limit']['#options']['0'] = '0'; ksort($form['indexing_throttle']['search_cron_limit']['#options']); } /** * Initialize a pager for theme('pager') without running an SQL query. * * @see apachesolr_pager_init() * @see pager_query() */ function _google_cse_adv_pager_init($total, $limit = 10, $element = 0) { global $pager_page_array, $pager_total, $pager_total_items; $page = isset($_GET['page']) ? $_GET['page'] : ''; // Convert comma-separated $page to an array, used by other functions. $pager_page_array = explode(',', $page); // We calculate the total of pages as ceil(items / limit). $pager_total_items[$element] = $total; $pager_total[$element] = ceil($pager_total_items[$element] / $limit); $pager_page_array[$element] = max(0, min((int)$pager_page_array[$element], ((int)$pager_total[$element]) - 1)); return $pager_page_array[$element]; } /** * Implementation of hook_theme(). */ function google_cse_adv_theme($existing, $type, $theme, $path) { return array( 'google_cse_adv_suggestion' => array( 'arguments' => array('suggestions' => NULL) ), ); } /** * @param array $suggestions * Suggestion links. * @return string */ function theme_google_cse_adv_suggestion($suggestions = NULL) { if (empty($suggestions)) { return ''; } $items = array(); foreach ($suggestions as $suggestion) { $items[] = $suggestion; } $output .= '
' . t('Did you mean:') . '
'; $output .= theme('item_list', $items); return $output; }