3 mēneši atpakaļ · c7b95d0571
--- a/src/Config.h
+++ b/src/Config.h
@@ -23,6 +23,13 @@
 
				 #define INFOBOX_FIELD_COUNT 4
			
 
				 #define MAX_RESULTS_PER_ENGINE 10
			
 
				 
			
 
				+#define CURL_TIMEOUT_SECS 15L
			
 
				+#define CURL_DNS_TIMEOUT_SECS 300L
			
 
				+
			
 
				+#define BING_IMAGE_URL "https://www.bing.com/images/search"
			
 
				+#define IMAGE_RESULTS_PER_PAGE 32
			
 
				+#define IMAGE_RESULT_FIELDS 4
			
 
				+
			
 
				 typedef struct {
			
 
				   char host[256];
			
 
				   int port;
			
--- a/src/Routes/Images.c
+++ b/src/Routes/Images.c
@@ -1,15 +1,7 @@
 
				 #include "Images.h"
			
 
				-#include "../Scraping/Scraping.h"
			
 
				-#include "../Utility/HttpClient.h"
			
 
				+#include "../Scraping/ImageScraping.h"
			
 
				 #include "../Utility/Unescape.h"
			
 
				-#include "../Utility/XmlHelper.h"
			
 
				-
			
 
				-#include <curl/curl.h>
			
 
				-#include <libxml/HTMLparser.h>
			
 
				-#include <libxml/xpath.h>
			
 
				-#include <stdio.h>
			
 
				-#include <stdlib.h>
			
 
				-#include <string.h>
			
 
				+#include "Config.h"
			
 
				 
			
 
				 int images_handler(UrlParams *params) {
			
 
				   TemplateContext ctx = new_context();
			
@@ -28,12 +20,12 @@ int images_handler(UrlParams *params) {
 
				     }
			
 
				   }
			
 
				 
			
 
				-  context_set(&ctx, "query", raw_query);
			
 
				-
			
 
				   char page_str[16], prev_str[16], next_str[16];
			
 
				   snprintf(page_str, sizeof(page_str), "%d", page);
			
 
				   snprintf(prev_str, sizeof(prev_str), "%d", page > 1 ? page - 1 : 0);
			
 
				   snprintf(next_str, sizeof(next_str), "%d", page + 1);
			
 
				+
			
 
				+  context_set(&ctx, "query", raw_query);
			
 
				   context_set(&ctx, "page", page_str);
			
 
				   context_set(&ctx, "prev_page", prev_str);
			
 
				   context_set(&ctx, "next_page", next_str);
			
@@ -49,208 +41,41 @@ int images_handler(UrlParams *params) {
 
				     return -1;
			
 
				   }
			
 
				 
			
 
				-  CURL *tmp = curl_easy_init();
			
 
				-  if (!tmp) {
			
 
				-    send_response("<h1>Error initializing curl</h1>");
			
 
				-    if (display_query)
			
 
				-      free(display_query);
			
 
				-    free_context(&ctx);
			
 
				-    return -1;
			
 
				-  }
			
 
				-  char *encoded_query = curl_easy_escape(tmp, raw_query, 0);
			
 
				-  curl_easy_cleanup(tmp);
			
 
				-
			
 
				-  if (!encoded_query) {
			
 
				-    send_response("<h1>Error encoding query</h1>");
			
 
				-    if (display_query)
			
 
				-      free(display_query);
			
 
				-    free_context(&ctx);
			
 
				-    return -1;
			
 
				-  }
			
 
				-
			
 
				-  char url[1024];
			
 
				-  int first = (page - 1) * 32 + 1;
			
 
				-  snprintf(url, sizeof(url), "https://www.bing.com/images/search?q=%s&first=%d",
			
 
				-           encoded_query, first);
			
 
				+  ImageResult *results = NULL;
			
 
				+  int result_count = 0;
			
 
				 
			
 
				-  HttpResponse resp = http_get(
			
 
				-      url,
			
 
				-      "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
			
 
				-  if (!resp.memory) {
			
 
				+  if (scrape_images(raw_query, page, &results, &result_count) != 0 ||
			
 
				+      !results) {
			
 
				     send_response("<h1>Error fetching images</h1>");
			
 
				-    free(encoded_query);
			
 
				-    free(display_query);
			
 
				-    free_context(&ctx);
			
 
				-    return -1;
			
 
				-  }
			
 
				-
			
 
				-  htmlDocPtr doc = htmlReadMemory(resp.memory, resp.size, NULL, NULL,
			
 
				-                                  HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
			
 
				-  if (!doc) {
			
 
				-    http_response_free(&resp);
			
 
				-    free(encoded_query);
			
 
				     free(display_query);
			
 
				     free_context(&ctx);
			
 
				     return -1;
			
 
				   }
			
 
				 
			
 
				-  xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
			
 
				+  char ***image_matrix = malloc(sizeof(char **) * result_count);
			
 
				+  int *inner_counts = malloc(sizeof(int) * result_count);
			
 
				 
			
 
				-  if (!xpathCtx) {
			
 
				-    xmlFreeDoc(doc);
			
 
				-    http_response_free(&resp);
			
 
				-    free(encoded_query);
			
 
				+  if (!image_matrix || !inner_counts) {
			
 
				+    if (image_matrix)
			
 
				+      free(image_matrix);
			
 
				+    if (inner_counts)
			
 
				+      free(inner_counts);
			
 
				+    free_image_results(results, result_count);
			
 
				     free(display_query);
			
 
				     free_context(&ctx);
			
 
				     return -1;
			
 
				   }
			
 
				 
			
 
				-  xmlXPathObjectPtr xpathObj =
			
 
				-      xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
			
 
				-
			
 
				-  int image_count = 0;
			
 
				-  char ***image_matrix = NULL;
			
 
				-  int *inner_counts = NULL;
			
 
				-
			
 
				-  if (xpathObj && xpathObj->nodesetval) {
			
 
				-    int nodes = xpathObj->nodesetval->nodeNr;
			
 
				-
			
 
				-    int max_images = (nodes < 32) ? nodes : 32;
			
 
				-    image_matrix = malloc(sizeof(char **) * max_images);
			
 
				-    inner_counts = malloc(sizeof(int) * max_images);
			
 
				-    if (!image_matrix || !inner_counts) {
			
 
				-      if (image_matrix) free(image_matrix);
			
 
				-      if (inner_counts) free(inner_counts);
			
 
				-      image_matrix = NULL;
			
 
				-      inner_counts = NULL;
			
 
				-    }
			
 
				-
			
 
				-    for (int i = 0; i < nodes; i++) {
			
 
				-      if (image_count >= 32)
			
 
				-        break;
			
 
				-
			
 
				-      xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
			
 
				-      xmlNodePtr img_node = NULL;
			
 
				-      xmlNodePtr tit_node = NULL;
			
 
				-      xmlNodePtr des_node = NULL;
			
 
				-      xmlNodePtr thumb_link = NULL;
			
 
				-
			
 
				-      for (xmlNodePtr child = node->children; child; child = child->next) {
			
 
				-        if (child->type != XML_ELEMENT_NODE)
			
 
				-          continue;
			
 
				-
			
 
				-        if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
			
 
				-          xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
			
 
				-          if (class) {
			
 
				-            if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
			
 
				-              thumb_link = child;
			
 
				-              for (xmlNodePtr thumb_child = child->children; thumb_child;
			
 
				-                   thumb_child = thumb_child->next) {
			
 
				-                if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
			
 
				-                  xmlChar *div_class =
			
 
				-                      xmlGetProp(thumb_child, (const xmlChar *)"class");
			
 
				-                  if (div_class &&
			
 
				-                      xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
			
 
				-                    for (xmlNodePtr cico_child = thumb_child->children;
			
 
				-                         cico_child; cico_child = cico_child->next) {
			
 
				-                      if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") ==
			
 
				-                          0) {
			
 
				-                        img_node = cico_child;
			
 
				-                        break;
			
 
				-                      }
			
 
				-                    }
			
 
				-                  }
			
 
				-                  if (div_class)
			
 
				-                    xmlFree(div_class);
			
 
				-                }
			
 
				-              }
			
 
				-            } else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
			
 
				-              tit_node = child;
			
 
				-            }
			
 
				-            xmlFree(class);
			
 
				-          }
			
 
				-        } else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
			
 
				-          xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
			
 
				-          if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
			
 
				-            for (xmlNodePtr meta_child = child->children; meta_child;
			
 
				-                 meta_child = meta_child->next) {
			
 
				-              if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
			
 
				-                xmlChar *div_class =
			
 
				-                    xmlGetProp(meta_child, (const xmlChar *)"class");
			
 
				-                if (div_class) {
			
 
				-                  if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
			
 
				-                    des_node = meta_child;
			
 
				-                  }
			
 
				-                  xmlFree(div_class);
			
 
				-                }
			
 
				-              } else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") ==
			
 
				-                         0) {
			
 
				-                xmlChar *a_class =
			
 
				-                    xmlGetProp(meta_child, (const xmlChar *)"class");
			
 
				-                if (a_class &&
			
 
				-                    xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
			
 
				-                  tit_node = meta_child;
			
 
				-                }
			
 
				-                if (a_class)
			
 
				-                  xmlFree(a_class);
			
 
				-              }
			
 
				-            }
			
 
				-          }
			
 
				-          if (class)
			
 
				-            xmlFree(class);
			
 
				-        }
			
 
				-      }
			
 
				-
			
 
				-      xmlChar *iurl =
			
 
				-          img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
			
 
				-      xmlChar *full_url =
			
 
				-          thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
			
 
				-      xmlChar *title = des_node
			
 
				-                           ? xmlNodeGetContent(des_node)
			
 
				-                           : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
			
 
				-      xmlChar *rurl =
			
 
				-          tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
			
 
				-
			
 
				-      if (iurl && strlen((char *)iurl) > 0) {
			
 
				-        char *proxy_url = NULL;
			
 
				-        CURL *esc_curl = curl_easy_init();
			
 
				-        if (esc_curl) {
			
 
				-          char *encoded = curl_easy_escape(esc_curl, (char *)iurl, 0);
			
 
				-          if (encoded) {
			
 
				-            size_t proxy_len = strlen("/proxy?url=") + strlen(encoded) + 1;
			
 
				-            proxy_url = malloc(proxy_len);
			
 
				-            if (proxy_url) {
			
 
				-              snprintf(proxy_url, proxy_len, "/proxy?url=%s", encoded);
			
 
				-            }
			
 
				-            curl_free(encoded);
			
 
				-          }
			
 
				-          curl_easy_cleanup(esc_curl);
			
 
				-        }
			
 
				-
			
 
				-        image_matrix[image_count] = malloc(sizeof(char *) * 4);
			
 
				-        image_matrix[image_count][0] =
			
 
				-            proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
			
 
				-        free(proxy_url);
			
 
				-        image_matrix[image_count][1] = strdup(title ? (char *)title : "Image");
			
 
				-        image_matrix[image_count][2] = strdup(rurl ? (char *)rurl : "#");
			
 
				-        image_matrix[image_count][3] =
			
 
				-            strdup(full_url ? (char *)full_url : "#");
			
 
				-        inner_counts[image_count] = 4;
			
 
				-        image_count++;
			
 
				-      }
			
 
				-
			
 
				-      if (iurl)
			
 
				-        xmlFree(iurl);
			
 
				-      if (title)
			
 
				-        xmlFree(title);
			
 
				-      if (rurl)
			
 
				-        xmlFree(rurl);
			
 
				-      if (full_url)
			
 
				-        xmlFree(full_url);
			
 
				-    }
			
 
				+  for (int i = 0; i < result_count; i++) {
			
 
				+    image_matrix[i] = malloc(sizeof(char *) * IMAGE_RESULT_FIELDS);
			
 
				+    image_matrix[i][0] = strdup(results[i].thumbnail_url);
			
 
				+    image_matrix[i][1] = strdup(results[i].title);
			
 
				+    image_matrix[i][2] = strdup(results[i].page_url);
			
 
				+    image_matrix[i][3] = strdup(results[i].full_url);
			
 
				+    inner_counts[i] = IMAGE_RESULT_FIELDS;
			
 
				   }
			
 
				 
			
 
				-  context_set_array_of_arrays(&ctx, "images", image_matrix, image_count,
			
 
				+  context_set_array_of_arrays(&ctx, "images", image_matrix, result_count,
			
 
				                               inner_counts);
			
 
				 
			
 
				   char *rendered = render_template("images.html", &ctx);
			
@@ -261,27 +86,15 @@ int images_handler(UrlParams *params) {
 
				     send_response("<h1>Error rendering image results</h1>");
			
 
				   }
			
 
				 
			
 
				-  if (image_matrix) {
			
 
				-    for (int i = 0; i < image_count; i++) {
			
 
				-      for (int j = 0; j < 4; j++) {
			
 
				-        free(image_matrix[i][j]);
			
 
				-      }
			
 
				-      free(image_matrix[i]);
			
 
				-    }
			
 
				-    free(image_matrix);
			
 
				-  }
			
 
				-  if (inner_counts) {
			
 
				-    free(inner_counts);
			
 
				+  for (int i = 0; i < result_count; i++) {
			
 
				+    for (int j = 0; j < IMAGE_RESULT_FIELDS; j++)
			
 
				+      free(image_matrix[i][j]);
			
 
				+    free(image_matrix[i]);
			
 
				   }
			
 
				+  free(image_matrix);
			
 
				+  free(inner_counts);
			
 
				 
			
 
				-  if (xpathObj)
			
 
				-    xmlXPathFreeObject(xpathObj);
			
 
				-  if (xpathCtx)
			
 
				-    xmlXPathFreeContext(xpathCtx);
			
 
				-  if (doc)
			
 
				-    xmlFreeDoc(doc);
			
 
				-  http_response_free(&resp);
			
 
				-  curl_free(encoded_query);
			
 
				+  free_image_results(results, result_count);
			
 
				   free(display_query);
			
 
				   free_context(&ctx);
			
 
				 
			
--- a/src/Scraping/ImageScraping.c
+++ b/src/Scraping/ImageScraping.c
@@ -0,0 +1,239 @@
 
				+#include "ImageScraping.h"
			
 
				+#include "../Utility/HttpClient.h"
			
 
				+#include "Config.h"
			
 
				+#include <libxml/HTMLparser.h>
			
 
				+#include <libxml/xpath.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+static char *build_proxy_url(const char *image_url) {
			
 
				+  if (!image_url)
			
 
				+    return NULL;
			
 
				+
			
 
				+  char *proxy_url = NULL;
			
 
				+  CURL *curl = curl_easy_init();
			
 
				+  if (curl) {
			
 
				+    char *encoded = curl_easy_escape(curl, (char *)image_url, 0);
			
 
				+    if (encoded) {
			
 
				+      size_t len = strlen("/proxy?url=") + strlen(encoded) + 1;
			
 
				+      proxy_url = malloc(len);
			
 
				+      if (proxy_url)
			
 
				+        snprintf(proxy_url, len, "/proxy?url=%s", encoded);
			
 
				+      curl_free(encoded);
			
 
				+    }
			
 
				+    curl_easy_cleanup(curl);
			
 
				+  }
			
 
				+
			
 
				+  return proxy_url;
			
 
				+}
			
 
				+
			
 
				+static int parse_image_node(xmlNodePtr node, ImageResult *result) {
			
 
				+  xmlNodePtr img_node = NULL;
			
 
				+  xmlNodePtr tit_node = NULL;
			
 
				+  xmlNodePtr des_node = NULL;
			
 
				+  xmlNodePtr thumb_link = NULL;
			
 
				+
			
 
				+  for (xmlNodePtr child = node->children; child; child = child->next) {
			
 
				+    if (child->type != XML_ELEMENT_NODE)
			
 
				+      continue;
			
 
				+
			
 
				+    if (xmlStrcmp(child->name, (const xmlChar *)"a") == 0) {
			
 
				+      xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
			
 
				+      if (class) {
			
 
				+        if (xmlStrstr(class, (const xmlChar *)"thumb") != NULL) {
			
 
				+          thumb_link = child;
			
 
				+          for (xmlNodePtr thumb_child = child->children; thumb_child;
			
 
				+               thumb_child = thumb_child->next) {
			
 
				+            if (xmlStrcmp(thumb_child->name, (const xmlChar *)"div") == 0) {
			
 
				+              xmlChar *div_class =
			
 
				+                  xmlGetProp(thumb_child, (const xmlChar *)"class");
			
 
				+              if (div_class &&
			
 
				+                  xmlStrcmp(div_class, (const xmlChar *)"cico") == 0) {
			
 
				+                for (xmlNodePtr cico_child = thumb_child->children; cico_child;
			
 
				+                     cico_child = cico_child->next) {
			
 
				+                  if (xmlStrcmp(cico_child->name, (const xmlChar *)"img") ==
			
 
				+                      0) {
			
 
				+                    img_node = cico_child;
			
 
				+                    break;
			
 
				+                  }
			
 
				+                }
			
 
				+              }
			
 
				+              if (div_class)
			
 
				+                xmlFree(div_class);
			
 
				+            }
			
 
				+          }
			
 
				+        } else if (xmlStrstr(class, (const xmlChar *)"tit") != NULL) {
			
 
				+          tit_node = child;
			
 
				+        }
			
 
				+        xmlFree(class);
			
 
				+      }
			
 
				+    } else if (xmlStrcmp(child->name, (const xmlChar *)"div") == 0) {
			
 
				+      xmlChar *class = xmlGetProp(child, (const xmlChar *)"class");
			
 
				+      if (class && xmlStrcmp(class, (const xmlChar *)"meta") == 0) {
			
 
				+        for (xmlNodePtr meta_child = child->children; meta_child;
			
 
				+             meta_child = meta_child->next) {
			
 
				+          if (xmlStrcmp(meta_child->name, (const xmlChar *)"div") == 0) {
			
 
				+            xmlChar *div_class =
			
 
				+                xmlGetProp(meta_child, (const xmlChar *)"class");
			
 
				+            if (div_class) {
			
 
				+              if (xmlStrcmp(div_class, (const xmlChar *)"des") == 0) {
			
 
				+                des_node = meta_child;
			
 
				+              }
			
 
				+              xmlFree(div_class);
			
 
				+            }
			
 
				+          } else if (xmlStrcmp(meta_child->name, (const xmlChar *)"a") == 0) {
			
 
				+            xmlChar *a_class = xmlGetProp(meta_child, (const xmlChar *)"class");
			
 
				+            if (a_class && xmlStrstr(a_class, (const xmlChar *)"tit") != NULL) {
			
 
				+              tit_node = meta_child;
			
 
				+            }
			
 
				+            if (a_class)
			
 
				+              xmlFree(a_class);
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+      if (class)
			
 
				+        xmlFree(class);
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  xmlChar *iurl =
			
 
				+      img_node ? xmlGetProp(img_node, (const xmlChar *)"src") : NULL;
			
 
				+  xmlChar *full_url =
			
 
				+      thumb_link ? xmlGetProp(thumb_link, (const xmlChar *)"href") : NULL;
			
 
				+  xmlChar *title = des_node ? xmlNodeGetContent(des_node)
			
 
				+                            : (tit_node ? xmlNodeGetContent(tit_node) : NULL);
			
 
				+  xmlChar *rurl =
			
 
				+      tit_node ? xmlGetProp(tit_node, (const xmlChar *)"href") : NULL;
			
 
				+
			
 
				+  if (!iurl || strlen((char *)iurl) == 0) {
			
 
				+    if (iurl)
			
 
				+      xmlFree(iurl);
			
 
				+    if (title)
			
 
				+      xmlFree(title);
			
 
				+    if (rurl)
			
 
				+      xmlFree(rurl);
			
 
				+    if (full_url)
			
 
				+      xmlFree(full_url);
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  char *proxy_url = build_proxy_url((char *)iurl);
			
 
				+  result->thumbnail_url = proxy_url ? strdup(proxy_url) : strdup((char *)iurl);
			
 
				+  free(proxy_url);
			
 
				+  result->title = strdup(title ? (char *)title : "Image");
			
 
				+  result->page_url = strdup(rurl ? (char *)rurl : "#");
			
 
				+  result->full_url = strdup(full_url ? (char *)full_url : "#");
			
 
				+
			
 
				+  if (iurl)
			
 
				+    xmlFree(iurl);
			
 
				+  if (title)
			
 
				+    xmlFree(title);
			
 
				+  if (rurl)
			
 
				+    xmlFree(rurl);
			
 
				+  if (full_url)
			
 
				+    xmlFree(full_url);
			
 
				+
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				+int scrape_images(const char *query, int page, ImageResult **out_results,
			
 
				+                  int *out_count) {
			
 
				+  *out_results = NULL;
			
 
				+  *out_count = 0;
			
 
				+
			
 
				+  if (!query || strlen(query) == 0)
			
 
				+    return -1;
			
 
				+
			
 
				+  CURL *tmp = curl_easy_init();
			
 
				+  if (!tmp)
			
 
				+    return -1;
			
 
				+
			
 
				+  char *encoded_query = curl_easy_escape(tmp, query, 0);
			
 
				+  curl_easy_cleanup(tmp);
			
 
				+
			
 
				+  if (!encoded_query)
			
 
				+    return -1;
			
 
				+
			
 
				+  char url[BUFFER_SIZE_LARGE];
			
 
				+  int first = (page - 1) * IMAGE_RESULTS_PER_PAGE + 1;
			
 
				+  snprintf(url, sizeof(url), "%s?q=%s&first=%d", BING_IMAGE_URL, encoded_query,
			
 
				+           first);
			
 
				+  free(encoded_query);
			
 
				+
			
 
				+  HttpResponse resp = http_get(
			
 
				+      url,
			
 
				+      "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko");
			
 
				+  if (!resp.memory) {
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  htmlDocPtr doc = htmlReadMemory(resp.memory, resp.size, NULL, NULL,
			
 
				+                                  HTML_PARSE_RECOVER | HTML_PARSE_NOERROR);
			
 
				+  if (!doc) {
			
 
				+    http_response_free(&resp);
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
			
 
				+  if (!xpathCtx) {
			
 
				+    xmlFreeDoc(doc);
			
 
				+    http_response_free(&resp);
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  xmlXPathObjectPtr xpathObj =
			
 
				+      xmlXPathEvalExpression((const xmlChar *)"//div[@class='item']", xpathCtx);
			
 
				+
			
 
				+  if (!xpathObj || !xpathObj->nodesetval) {
			
 
				+    if (xpathObj)
			
 
				+      xmlXPathFreeObject(xpathObj);
			
 
				+    xmlXPathFreeContext(xpathCtx);
			
 
				+    xmlFreeDoc(doc);
			
 
				+    http_response_free(&resp);
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  int nodes = xpathObj->nodesetval->nodeNr;
			
 
				+  int max_images =
			
 
				+      (nodes < IMAGE_RESULTS_PER_PAGE) ? nodes : IMAGE_RESULTS_PER_PAGE;
			
 
				+
			
 
				+  ImageResult *results = malloc(sizeof(ImageResult) * max_images);
			
 
				+  if (!results) {
			
 
				+    xmlXPathFreeObject(xpathObj);
			
 
				+    xmlXPathFreeContext(xpathCtx);
			
 
				+    xmlFreeDoc(doc);
			
 
				+    http_response_free(&resp);
			
 
				+    return -1;
			
 
				+  }
			
 
				+
			
 
				+  int count = 0;
			
 
				+  for (int i = 0; i < nodes && count < IMAGE_RESULTS_PER_PAGE; i++) {
			
 
				+    xmlNodePtr node = xpathObj->nodesetval->nodeTab[i];
			
 
				+    if (parse_image_node(node, &results[count])) {
			
 
				+      count++;
			
 
				+    }
			
 
				+  }
			
 
				+
			
 
				+  xmlXPathFreeObject(xpathObj);
			
 
				+  xmlXPathFreeContext(xpathCtx);
			
 
				+  xmlFreeDoc(doc);
			
 
				+  http_response_free(&resp);
			
 
				+
			
 
				+  *out_results = results;
			
 
				+  *out_count = count;
			
 
				+  return 0;
			
 
				+}
			
 
				+
			
 
				+void free_image_results(ImageResult *results, int count) {
			
 
				+  if (!results)
			
 
				+    return;
			
 
				+
			
 
				+  for (int i = 0; i < count; i++) {
			
 
				+    free(results[i].thumbnail_url);
			
 
				+    free(results[i].title);
			
 
				+    free(results[i].page_url);
			
 
				+    free(results[i].full_url);
			
 
				+  }
			
 
				+  free(results);
			
 
				+}
			
--- a/src/Scraping/ImageScraping.h
+++ b/src/Scraping/ImageScraping.h
@@ -0,0 +1,18 @@
 
				+#ifndef IMAGESCRAPING_H
			
 
				+#define IMAGESCRAPING_H
			
 
				+
			
 
				+#include <curl/curl.h>
			
 
				+#include <libxml/HTMLparser.h>
			
 
				+
			
 
				+typedef struct {
			
 
				+  char *thumbnail_url;
			
 
				+  char *title;
			
 
				+  char *page_url;
			
 
				+  char *full_url;
			
 
				+} ImageResult;
			
 
				+
			
 
				+int scrape_images(const char *query, int page, ImageResult **out_results,
			
 
				+                  int *out_count);
			
 
				+void free_image_results(ImageResult *results, int count);
			
 
				+
			
 
				+#endif
			
--- a/src/Scraping/Scraping.c
+++ b/src/Scraping/Scraping.c
@@ -1,395 +1,20 @@
 
				 #include "Scraping.h"
			
 
				 #include "../Cache/Cache.h"
			
 
				 #include "../Proxy/Proxy.h"
			
 
				-#include "../Utility/Unescape.h"
			
 
				-#include "../Utility/XmlHelper.h"
			
 
				 #include "Config.h"
			
 
				 #include <curl/curl.h>
			
 
				 #include <libxml/HTMLparser.h>
			
 
				-#include <libxml/xpath.h>
			
 
				 #include <stdio.h>
			
 
				 #include <stdlib.h>
			
 
				-#include <string.h>
			
 
				 #include <time.h>
			
 
				-#include <unistd.h>
			
 
				-
			
 
				-static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb,
			
 
				-                                  void *userp) {
			
 
				-  size_t realsize = size * nmemb;
			
 
				-  MemoryBuffer *mem = (MemoryBuffer *)userp;
			
 
				-
			
 
				-  if (mem->size + realsize + 1 > mem->capacity) {
			
 
				-    size_t new_cap =
			
 
				-        mem->capacity == 0 ? INITIAL_BUFFER_SIZE : mem->capacity * 2;
			
 
				-    while (new_cap < mem->size + realsize + 1)
			
 
				-      new_cap *= 2;
			
 
				-
			
 
				-    char *ptr = (char *)realloc(mem->memory, new_cap);
			
 
				-    if (!ptr) {
			
 
				-      return 0;
			
 
				-    }
			
 
				-    mem->memory = ptr;
			
 
				-    mem->capacity = new_cap;
			
 
				-  }
			
 
				-
			
 
				-  memcpy(&(mem->memory[mem->size]), contents, realsize);
			
 
				-  mem->size += realsize;
			
 
				-  mem->memory[mem->size] = 0;
			
 
				-
			
 
				-  return realsize;
			
 
				-}
			
 
				-
			
 
				-static const char *get_random_user_agent(void) {
			
 
				-  static const char *agents[] = {
			
 
				-      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
			
 
				-      "like Gecko) Chrome/120.0.0.0 Safari/537.36",
			
 
				-      "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
			
 
				-      "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
			
 
				-      "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
			
 
				-      "Gecko) "
			
 
				-      "Chrome/120.0.0.0` Safari/537.36",
			
 
				-      "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 "
			
 
				-      "Firefox/121.0",
			
 
				-      "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
			
 
				-      "(KHTML, like Gecko) Version/17.2 Safari/605.1.15"};
			
 
				-  return agents[rand() % 5];
			
 
				-}
			
 
				-
			
 
				-static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
			
 
				-                          SearchResult **out_results, int max_results) {
			
 
				-  (void)engine_name;
			
 
				-  int found_count = 0;
			
 
				-
			
 
				-  xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
			
 
				-  if (!xpathCtx) {
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  xmlXPathObjectPtr xpathObj = xml_xpath_eval(
			
 
				-      xpathCtx, "//tr[not(contains(@class, "
			
 
				-                "'result-sponsored'))]//a[@class='result-link']");
			
 
				-
			
 
				-  if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
			
 
				-    if (xpathObj)
			
 
				-      xmlXPathFreeObject(xpathObj);
			
 
				-    xmlXPathFreeContext(xpathCtx);
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  int num_links = xpathObj->nodesetval->nodeNr;
			
 
				-  *out_results = xml_result_alloc(num_links, max_results);
			
 
				-  if (!*out_results) {
			
 
				-    xmlXPathFreeObject(xpathObj);
			
 
				-    xmlXPathFreeContext(xpathCtx);
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  for (int i = 0; i < num_links && found_count < max_results; i++) {
			
 
				-    xmlNodePtr linkNode = xpathObj->nodesetval->nodeTab[i];
			
 
				-    char *title = xml_node_content(linkNode);
			
 
				-    char *url = (char *)xmlGetProp(linkNode, (xmlChar *)"href");
			
 
				-    char *snippet_text = NULL;
			
 
				-
			
 
				-    xmlNodePtr current = linkNode->parent;
			
 
				-    while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0)
			
 
				-      current = current->parent;
			
 
				-
			
 
				-    if (current && current->next) {
			
 
				-      xmlNodePtr snippetRow = current->next;
			
 
				-      while (snippetRow &&
			
 
				-             xmlStrcasecmp(snippetRow->name, (const xmlChar *)"tr") != 0)
			
 
				-        snippetRow = snippetRow->next;
			
 
				-      if (snippetRow) {
			
 
				-        xpathCtx->node = snippetRow;
			
 
				-        xmlXPathObjectPtr sObj =
			
 
				-            xml_xpath_eval(xpathCtx, ".//td[@class='result-snippet']");
			
 
				-        if (sObj && sObj->nodesetval && sObj->nodesetval->nodeNr > 0) {
			
 
				-          snippet_text = xml_node_content(sObj->nodesetval->nodeTab[0]);
			
 
				-        }
			
 
				-        if (sObj)
			
 
				-          xmlXPathFreeObject(sObj);
			
 
				-        xpathCtx->node = NULL;
			
 
				-      }
			
 
				-    }
			
 
				-
			
 
				-    (*out_results)[found_count].url = unescape_search_url(url);
			
 
				-    (*out_results)[found_count].title = strdup(title ? title : "No Title");
			
 
				-    (*out_results)[found_count].snippet =
			
 
				-        strdup(snippet_text ? snippet_text : "");
			
 
				-    found_count++;
			
 
				-
			
 
				-    if (title)
			
 
				-      xmlFree(title);
			
 
				-    if (url)
			
 
				-      xmlFree(url);
			
 
				-    if (snippet_text)
			
 
				-      xmlFree(snippet_text);
			
 
				-  }
			
 
				-
			
 
				-  xmlXPathFreeObject(xpathObj);
			
 
				-  xmlXPathFreeContext(xpathCtx);
			
 
				-  return found_count;
			
 
				-}
			
 
				-
			
 
				-static int parse_startpage(const char *engine_name, xmlDocPtr doc,
			
 
				-                           SearchResult **out_results, int max_results) {
			
 
				-  (void)engine_name;
			
 
				-  int found_count = 0;
			
 
				-
			
 
				-  xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
			
 
				-  if (!xpathCtx) {
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  xmlXPathObjectPtr xpathObj =
			
 
				-      xml_xpath_eval(xpathCtx, "//div[contains(@class, 'result')]");
			
 
				-
			
 
				-  if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
			
 
				-    if (xpathObj)
			
 
				-      xmlXPathFreeObject(xpathObj);
			
 
				-    xmlXPathFreeContext(xpathCtx);
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  int num_results = xpathObj->nodesetval->nodeNr;
			
 
				-  *out_results = xml_result_alloc(num_results, max_results);
			
 
				-  if (!*out_results) {
			
 
				-    xmlXPathFreeObject(xpathObj);
			
 
				-    xmlXPathFreeContext(xpathCtx);
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  for (int i = 0; i < num_results && found_count < max_results; i++) {
			
 
				-    xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
			
 
				-    xpathCtx->node = resultNode;
			
 
				-
			
 
				-    xmlXPathObjectPtr linkObj =
			
 
				-        xml_xpath_eval(xpathCtx, ".//a[contains(@class, 'result-link')]");
			
 
				-    char *url =
			
 
				-        (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
			
 
				-            ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
			
 
				-                                 (xmlChar *)"href")
			
 
				-            : NULL;
			
 
				-
			
 
				-    xmlXPathObjectPtr titleObj =
			
 
				-        xml_xpath_eval(xpathCtx, ".//h2[contains(@class, 'wgl-title')]");
			
 
				-    char *title =
			
 
				-        (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
			
 
				-            ? xml_node_content(titleObj->nodesetval->nodeTab[0])
			
 
				-            : NULL;
			
 
				-
			
 
				-    xmlXPathObjectPtr snippetObj =
			
 
				-        xml_xpath_eval(xpathCtx, ".//p[contains(@class, 'description')]");
			
 
				-    char *snippet_text =
			
 
				-        (snippetObj && snippetObj->nodesetval &&
			
 
				-         snippetObj->nodesetval->nodeNr > 0)
			
 
				-            ? xml_node_content(snippetObj->nodesetval->nodeTab[0])
			
 
				-            : NULL;
			
 
				-
			
 
				-    if (url && title) {
			
 
				-      (*out_results)[found_count].url = strdup(url);
			
 
				-      (*out_results)[found_count].title = strdup(title);
			
 
				-      (*out_results)[found_count].snippet =
			
 
				-          strdup(snippet_text ? snippet_text : "");
			
 
				-      found_count++;
			
 
				-    }
			
 
				-
			
 
				-    if (title)
			
 
				-      xmlFree(title);
			
 
				-    if (url)
			
 
				-      xmlFree(url);
			
 
				-    if (snippet_text)
			
 
				-      xmlFree(snippet_text);
			
 
				-    if (linkObj)
			
 
				-      xmlXPathFreeObject(linkObj);
			
 
				-    if (titleObj)
			
 
				-      xmlXPathFreeObject(titleObj);
			
 
				-    if (snippetObj)
			
 
				-      xmlXPathFreeObject(snippetObj);
			
 
				-  }
			
 
				-
			
 
				-  xpathCtx->node = NULL;
			
 
				-  xmlXPathFreeObject(xpathObj);
			
 
				-  xmlXPathFreeContext(xpathCtx);
			
 
				-  return found_count;
			
 
				-}
			
 
				-
			
 
				-static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
			
 
				-                       SearchResult **out_results, int max_results) {
			
 
				-  (void)engine_name;
			
 
				-  int found_count = 0;
			
 
				-
			
 
				-  xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
			
 
				-  if (!xpathCtx) {
			
 
				-    return 0;
			
 
				-  }
			
 
				-
			
 
				-  xmlXPathObjectPtr xpathObj =
			
 
				-      xml_xpath_eval(xpathCtx, "//div[contains(@class, 'algo-sr')]");
			
 
				-
			
 
				-  if (!xpathObj || !xpathObj->nodesetval || xpathObj->nodesetval->nodeNr == 0) {
			
 
				-    if (xpathObj)
			
 
				-      xmlXPathFreeObject(xpathObj);
			
 
				-    xmlXPathFreeContext(xpathCtx);
			
 
				-    return 0;
			
 
				-  }
			
 
				 
			
 
				-  int num_results = xpathObj->nodesetval->nodeNr;
			
 
				-  *out_results = xml_result_alloc(num_results, max_results);
			
 
				-  if (!*out_results) {
			
 
				-    xmlXPathFreeObject(xpathObj);
			
 
				-    xmlXPathFreeContext(xpathCtx);
			
 
				+int check_cache_for_job(ScrapeJob *job) {
			
 
				+  if (get_cache_ttl_search() <= 0)
			
 
				     return 0;
			
 
				-  }
			
 
				-
			
 
				-  for (int i = 0; i < num_results && found_count < max_results; i++) {
			
 
				-    xmlNodePtr resultNode = xpathObj->nodesetval->nodeTab[i];
			
 
				-    xpathCtx->node = resultNode;
			
 
				-
			
 
				-    xmlXPathObjectPtr linkObj = xml_xpath_eval(
			
 
				-        xpathCtx, ".//div[contains(@class, 'compTitle')]//a[@target='_blank']");
			
 
				-    char *url =
			
 
				-        (linkObj && linkObj->nodesetval && linkObj->nodesetval->nodeNr > 0)
			
 
				-            ? (char *)xmlGetProp(linkObj->nodesetval->nodeTab[0],
			
 
				-                                 (xmlChar *)"href")
			
 
				-            : NULL;
			
 
				-
			
 
				-    xmlXPathObjectPtr titleObj =
			
 
				-        xml_xpath_eval(xpathCtx, ".//h3[contains(@class, 'title')]");
			
 
				-    char *title =
			
 
				-        (titleObj && titleObj->nodesetval && titleObj->nodesetval->nodeNr > 0)
			
 
				-            ? xml_node_content(titleObj->nodesetval->nodeTab[0])
			
 
				-            : NULL;
			
 
				-
			
 
				-    xmlXPathObjectPtr snippetObj =
			
 
				-        xml_xpath_eval(xpathCtx, ".//div[contains(@class, 'compText')]//p");
			
 
				-    char *snippet_text =
			
 
				-        (snippetObj && snippetObj->nodesetval &&
			
 
				-         snippetObj->nodesetval->nodeNr > 0)
			
 
				-            ? xml_node_content(snippetObj->nodesetval->nodeTab[0])
			
 
				-            : NULL;
			
 
				-
			
 
				-    if (url && title) {
			
 
				-      (*out_results)[found_count].url = unescape_search_url(url);
			
 
				-      (*out_results)[found_count].title = strdup(title);
			
 
				-      (*out_results)[found_count].snippet =
			
 
				-          strdup(snippet_text ? snippet_text : "");
			
 
				-      found_count++;
			
 
				-    }
			
 
				-
			
 
				-    if (title)
			
 
				-      xmlFree(title);
			
 
				-    if (url)
			
 
				-      xmlFree(url);
			
 
				-    if (snippet_text)
			
 
				-      xmlFree(snippet_text);
			
 
				-    if (linkObj)
			
 
				-      xmlXPathFreeObject(linkObj);
			
 
				-    if (titleObj)
			
 
				-      xmlXPathFreeObject(titleObj);
			
 
				-    if (snippetObj)
			
 
				-      xmlXPathFreeObject(snippetObj);
			
 
				-  }
			
 
				-
			
 
				-  xpathCtx->node = NULL;
			
 
				-  xmlXPathFreeObject(xpathObj);
			
 
				-  xmlXPathFreeContext(xpathCtx);
			
 
				-  return found_count;
			
 
				-}
			
 
				-
			
 
				-const SearchEngine ENGINE_REGISTRY[] = {
			
 
				-    {.name = "DuckDuckGo Lite",
			
 
				-     .base_url = "https://lite.duckduckgo.com/lite/?q=",
			
 
				-     .host_header = "lite.duckduckgo.com",
			
 
				-     .referer = "https://lite.duckduckgo.com/",
			
 
				-     .page_param = "s",
			
 
				-     .page_multiplier = 30,
			
 
				-     .page_base = 0,
			
 
				-     .parser = parse_ddg_lite},
			
 
				-    {.name = "Startpage",
			
 
				-     .base_url = "https://www.startpage.com/sp/search?q=",
			
 
				-     .host_header = "www.startpage.com",
			
 
				-     .referer = "https://www.startpage.com/",
			
 
				-     .page_param = "page",
			
 
				-     .page_multiplier = 1,
			
 
				-     .page_base = 1,
			
 
				-     .parser = parse_startpage},
			
 
				-    {.name = "Yahoo",
			
 
				-     .base_url = "https://search.yahoo.com/search?p=",
			
 
				-     .host_header = "search.yahoo.com",
			
 
				-     .referer = "https://search.yahoo.com/",
			
 
				-     .page_param = "b",
			
 
				-     .page_multiplier = 10,
			
 
				-     .page_base = 1,
			
 
				-     .parser = parse_yahoo}};
			
 
				-
			
 
				-const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);
			
 
				-
			
 
				-#define CURL_TIMEOUT 15L
			
 
				-#define CURL_DNS_TIMEOUT 300L
			
 
				-
			
 
				-static void configure_curl_handle(CURL *curl, const char *full_url,
			
 
				-                                  MemoryBuffer *chunk,
			
 
				-                                  struct curl_slist *headers) {
			
 
				-  curl_easy_setopt(curl, CURLOPT_URL, full_url);
			
 
				-  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
			
 
				-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
			
 
				-  curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)chunk);
			
 
				-  curl_easy_setopt(curl, CURLOPT_USERAGENT, get_random_user_agent());
			
 
				-
			
 
				-  curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
			
 
				-  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
			
 
				-  curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, CURL_DNS_TIMEOUT);
			
 
				-  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
			
 
				-  curl_easy_setopt(curl, CURLOPT_TIMEOUT, CURL_TIMEOUT);
			
 
				-  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
			
 
				-  curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "");
			
 
				-
			
 
				-  apply_proxy_settings(curl);
			
 
				-}
			
 
				-
			
 
				-static char *build_search_url(const char *base_url, const char *page_param,
			
 
				-                              int page_multiplier, int page_base,
			
 
				-                              const char *encoded_query, int page) {
			
 
				-  int page_value = (page < 1 ? 1 : page - 1) * page_multiplier + page_base;
			
 
				-  char *url = malloc(BUFFER_SIZE_LARGE);
			
 
				-  if (!url) {
			
 
				-    return NULL;
			
 
				-  }
			
 
				-  snprintf(url, BUFFER_SIZE_LARGE, "%s%s&%s=%d", base_url, encoded_query,
			
 
				-           page_param, page_value);
			
 
				-  return url;
			
 
				-}
			
 
				-
			
 
				-static struct curl_slist *build_request_headers(const char *host_header,
			
 
				-                                                const char *referer) {
			
 
				-  struct curl_slist *headers = NULL;
			
 
				-  char host_buf[BUFFER_SIZE_MEDIUM], ref_buf[BUFFER_SIZE_MEDIUM];
			
 
				-
			
 
				-  snprintf(host_buf, sizeof(host_buf), "Host: %s", host_header);
			
 
				-  snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", referer);
			
 
				-
			
 
				-  headers = curl_slist_append(headers, host_buf);
			
 
				-  headers = curl_slist_append(headers, ref_buf);
			
 
				-  headers = curl_slist_append(
			
 
				-      headers,
			
 
				-      "Accept: "
			
 
				-      "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
			
 
				-  headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
			
 
				-  headers = curl_slist_append(headers, "DNT: 1");
			
 
				-
			
 
				-  return headers;
			
 
				-}
			
 
				-
			
 
				-static int check_cache_for_job(ScrapeJob *job) {
			
 
				-  if (get_cache_ttl_search() <= 0) {
			
 
				-    return 0;
			
 
				-  }
			
 
				 
			
 
				   char *key = cache_compute_key(job->query, job->page, job->engine->name);
			
 
				-  if (!key) {
			
 
				+  if (!key)
			
 
				     return 0;
			
 
				-  }
			
 
				 
			
 
				   char *cached_data = NULL;
			
 
				   size_t cached_size = 0;
			
@@ -414,27 +39,31 @@ static int check_cache_for_job(ScrapeJob *job) {
 
				   return 0;
			
 
				 }
			
 
				 
			
 
				-static void process_job_response(ScrapeJob *job, CURL *handle, CURLMsg *msg) {
			
 
				-  if (msg->data.result == CURLE_OK && job->response.size > 0) {
			
 
				-    char *key = cache_compute_key(job->query, job->page, job->engine->name);
			
 
				-    if (key && get_cache_ttl_search() > 0) {
			
 
				-      cache_set(key, job->response.memory, job->response.size);
			
 
				-      free(key);
			
 
				-    }
			
 
				+void parse_and_cache_response(ScrapeJob *job) {
			
 
				+  if (job->response.size == 0) {
			
 
				+    job->results_count = 0;
			
 
				+    return;
			
 
				+  }
			
 
				 
			
 
				-    xmlDocPtr doc = htmlReadMemory(
			
 
				-        job->response.memory, job->response.size, NULL, NULL,
			
 
				-        HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
			
 
				+  char *key = cache_compute_key(job->query, job->page, job->engine->name);
			
 
				+  if (key && get_cache_ttl_search() > 0)
			
 
				+    cache_set(key, job->response.memory, job->response.size);
			
 
				+  free(key);
			
 
				 
			
 
				-    if (doc) {
			
 
				-      job->results_count = job->engine->parser(
			
 
				-          job->engine->name, doc, job->out_results, job->max_results);
			
 
				-      xmlFreeDoc(doc);
			
 
				-    }
			
 
				+  xmlDocPtr doc = htmlReadMemory(
			
 
				+      job->response.memory, job->response.size, NULL, NULL,
			
 
				+      HTML_PARSE_RECOVER | HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING);
			
 
				+
			
 
				+  if (doc) {
			
 
				+    job->results_count = job->engine->parser(
			
 
				+        job->engine->name, doc, job->out_results, job->max_results);
			
 
				+    xmlFreeDoc(doc);
			
 
				   } else {
			
 
				     job->results_count = 0;
			
 
				   }
			
 
				+}
			
 
				 
			
 
				+void cleanup_job_handle(ScrapeJob *job, CURL *handle) {
			
 
				   struct curl_slist *headers = NULL;
			
 
				   curl_easy_getinfo(handle, CURLINFO_PRIVATE, &headers);
			
 
				   if (headers)
			
@@ -444,67 +73,112 @@ static void process_job_response(ScrapeJob *job, CURL *handle, CURLMsg *msg) {
 
				   job->response.memory = NULL;
			
 
				 }
			
 
				 
			
 
				-int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) {
			
 
				-  int retries = 0;
			
 
				+void process_response(ScrapeJob *job, CURL *handle, CURLMsg *msg) {
			
 
				+  if (msg->data.result == CURLE_OK)
			
 
				+    parse_and_cache_response(job);
			
 
				+  else
			
 
				+    job->results_count = 0;
			
 
				 
			
 
				-retry:
			
 
				-  CURLM *multi_handle = curl_multi_init();
			
 
				-  if (!multi_handle) {
			
 
				+  cleanup_job_handle(job, handle);
			
 
				+}
			
 
				+
			
 
				+int setup_job(ScrapeJob *job, CURLM *multi_handle) {
			
 
				+  if (job->handle)
			
 
				+    curl_easy_cleanup(job->handle);
			
 
				+  if (job->response.memory)
			
 
				+    free(job->response.memory);
			
 
				+
			
 
				+  if (check_cache_for_job(job)) {
			
 
				+    job->results_count = job->results_count > 0 ? job->results_count : 0;
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  char *encoded_query = curl_easy_escape(NULL, job->query, 0);
			
 
				+  if (!encoded_query)
			
 
				+    return -1;
			
 
				+
			
 
				+  char *full_url =
			
 
				+      build_search_url(job->engine->base_url, job->engine->page_param,
			
 
				+                       job->engine->page_multiplier, job->engine->page_base,
			
 
				+                       encoded_query, job->page);
			
 
				+  free(encoded_query);
			
 
				+
			
 
				+  if (!full_url)
			
 
				+    return -1;
			
 
				+
			
 
				+  job->handle = curl_easy_init();
			
 
				+  if (!job->handle) {
			
 
				+    free(full_url);
			
 
				     return -1;
			
 
				   }
			
 
				 
			
 
				-  for (int i = 0; i < num_jobs; i++) {
			
 
				-    ScrapeJob *job = &jobs[i];
			
 
				+  job->response.memory = (char *)malloc(INITIAL_BUFFER_SIZE);
			
 
				+  job->response.size = 0;
			
 
				+  job->response.capacity = INITIAL_BUFFER_SIZE;
			
 
				 
			
 
				-    if (job->handle) {
			
 
				-      curl_easy_cleanup(job->handle);
			
 
				-      job->handle = NULL;
			
 
				-    }
			
 
				-    if (job->response.memory) {
			
 
				-      free(job->response.memory);
			
 
				-    }
			
 
				+  struct curl_slist *headers =
			
 
				+      build_request_headers(job->engine->host_header, job->engine->referer);
			
 
				 
			
 
				-    if (check_cache_for_job(job)) {
			
 
				-      job->results_count = job->results_count > 0 ? job->results_count : 0;
			
 
				-      continue;
			
 
				-    }
			
 
				+  configure_curl_handle(job->handle, full_url, &job->response, headers);
			
 
				+  curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers);
			
 
				 
			
 
				-    char *encoded_query = curl_easy_escape(NULL, job->query, 0);
			
 
				-    if (!encoded_query) {
			
 
				-      continue;
			
 
				-    }
			
 
				+  free(full_url);
			
 
				+  curl_multi_add_handle(multi_handle, job->handle);
			
 
				+  return 0;
			
 
				+}
			
 
				 
			
 
				-    char *full_url =
			
 
				-        build_search_url(job->engine->base_url, job->engine->page_param,
			
 
				-                         job->engine->page_multiplier, job->engine->page_base,
			
 
				-                         encoded_query, job->page);
			
 
				-    free(encoded_query);
			
 
				+int handle_responses(CURLM *multi_handle, ScrapeJob *jobs, int num_jobs) {
			
 
				+  CURLMsg *msg;
			
 
				+  int msgs_left;
			
 
				 
			
 
				-    if (!full_url) {
			
 
				+  while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
			
 
				+    if (msg->msg != CURLMSG_DONE)
			
 
				       continue;
			
 
				-    }
			
 
				 
			
 
				-    job->handle = curl_easy_init();
			
 
				-    if (!job->handle) {
			
 
				-      free(full_url);
			
 
				-      continue;
			
 
				+    CURL *handle = msg->easy_handle;
			
 
				+
			
 
				+    for (int i = 0; i < num_jobs; i++) {
			
 
				+      if (jobs[i].handle && jobs[i].handle == handle) {
			
 
				+        process_response(&jobs[i], handle, msg);
			
 
				+        curl_multi_remove_handle(multi_handle, handle);
			
 
				+        curl_easy_cleanup(handle);
			
 
				+        jobs[i].handle = NULL;
			
 
				+        break;
			
 
				+      }
			
 
				     }
			
 
				+  }
			
 
				 
			
 
				-    job->response.memory = (char *)malloc(INITIAL_BUFFER_SIZE);
			
 
				-    job->response.size = 0;
			
 
				-    job->response.capacity = INITIAL_BUFFER_SIZE;
			
 
				+  return 0;
			
 
				+}
			
 
				 
			
 
				-    struct curl_slist *headers =
			
 
				-        build_request_headers(job->engine->host_header, job->engine->referer);
			
 
				+int should_retry(ScrapeJob *jobs, int num_jobs) {
			
 
				+  if (proxy_count <= 0)
			
 
				+    return 0;
			
 
				 
			
 
				-    configure_curl_handle(job->handle, full_url, &job->response, headers);
			
 
				-    curl_easy_setopt(job->handle, CURLOPT_PRIVATE, headers);
			
 
				+  for (int i = 0; i < num_jobs; i++) {
			
 
				+    if (jobs[i].results_count == 0 && jobs[i].response.size == 0)
			
 
				+      return 1;
			
 
				+  }
			
 
				+  return 0;
			
 
				+}
			
 
				 
			
 
				-    free(full_url);
			
 
				-    curl_multi_add_handle(multi_handle, job->handle);
			
 
				+int scrape_engines_parallel(ScrapeJob *jobs, int num_jobs) {
			
 
				+  int retries = 0;
			
 
				+
			
 
				+retry:
			
 
				+  CURLM *multi_handle = curl_multi_init();
			
 
				+  if (!multi_handle)
			
 
				+    return -1;
			
 
				+
			
 
				+  for (int i = 0; i < num_jobs; i++) {
			
 
				+    if (setup_job(&jobs[i], multi_handle) != 0 && jobs[i].handle) {
			
 
				+      curl_multi_remove_handle(multi_handle, jobs[i].handle);
			
 
				+      curl_easy_cleanup(jobs[i].handle);
			
 
				+      jobs[i].handle = NULL;
			
 
				+    }
			
 
				   }
			
 
				 
			
 
				-  usleep(100000 + (rand() % 100000));
			
 
				+  http_delay();
			
 
				 
			
 
				   int still_running = 0;
			
 
				   curl_multi_perform(multi_handle, &still_running);
			
@@ -512,50 +186,17 @@ retry:
 
				   do {
			
 
				     int numfds = 0;
			
 
				     CURLMcode mc = curl_multi_wait(multi_handle, NULL, 0, 1000, &numfds);
			
 
				-
			
 
				-    if (mc != CURLM_OK) {
			
 
				+    if (mc != CURLM_OK)
			
 
				       break;
			
 
				-    }
			
 
				-
			
 
				     curl_multi_perform(multi_handle, &still_running);
			
 
				   } while (still_running);
			
 
				 
			
 
				-  CURLMsg *msg;
			
 
				-  int msgs_left;
			
 
				-  while ((msg = curl_multi_info_read(multi_handle, &msgs_left))) {
			
 
				-    if (msg->msg == CURLMSG_DONE) {
			
 
				-      CURL *handle = msg->easy_handle;
			
 
				-
			
 
				-      for (int i = 0; i < num_jobs; i++) {
			
 
				-        if (jobs[i].handle && jobs[i].handle == handle) {
			
 
				-          ScrapeJob *job = &jobs[i];
			
 
				-
			
 
				-          process_job_response(job, handle, msg);
			
 
				-
			
 
				-          curl_multi_remove_handle(multi_handle, handle);
			
 
				-          if (handle)
			
 
				-            curl_easy_cleanup(handle);
			
 
				-          job->handle = NULL;
			
 
				-          break;
			
 
				-        }
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				-
			
 
				+  handle_responses(multi_handle, jobs, num_jobs);
			
 
				   curl_multi_cleanup(multi_handle);
			
 
				 
			
 
				-  if (retries < max_proxy_retries && proxy_count > 0) {
			
 
				-    int any_failed = 0;
			
 
				-    for (int i = 0; i < num_jobs; i++) {
			
 
				-      if (jobs[i].results_count == 0 && jobs[i].response.size == 0) {
			
 
				-        any_failed = 1;
			
 
				-        break;
			
 
				-      }
			
 
				-    }
			
 
				-    if (any_failed) {
			
 
				-      retries++;
			
 
				-      goto retry;
			
 
				-    }
			
 
				+  if (retries < max_proxy_retries && should_retry(jobs, num_jobs)) {
			
 
				+    retries++;
			
 
				+    goto retry;
			
 
				   }
			
 
				 
			
 
				   return 0;
			
--- a/src/Scraping/Scraping.h
+++ b/src/Scraping/Scraping.h
@@ -3,6 +3,7 @@
 
				 
			
 
				 #include <curl/curl.h>
			
 
				 #include <libxml/HTMLparser.h>
			
 
				+#include <libxml/xpath.h>
			
 
				 
			
 
				 typedef struct {
			
 
				   char *url;
			
@@ -45,6 +46,25 @@ typedef struct {
 
				 extern const SearchEngine ENGINE_REGISTRY[];
			
 
				 extern const int ENGINE_COUNT;
			
 
				 
			
 
				+size_t write_memory_callback(void *contents, size_t size, size_t nmemb,
			
 
				+                             void *userp);
			
 
				+const char *get_random_user_agent(void);
			
 
				+void configure_curl_handle(CURL *curl, const char *full_url,
			
 
				+                           MemoryBuffer *chunk, struct curl_slist *headers);
			
 
				+char *build_search_url(const char *base_url, const char *page_param,
			
 
				+                       int page_multiplier, int page_base,
			
 
				+                       const char *encoded_query, int page);
			
 
				+struct curl_slist *build_request_headers(const char *host_header,
			
 
				+                                         const char *referer);
			
 
				+void http_delay(void);
			
 
				+
			
 
				+xmlXPathContextPtr create_xpath_context(xmlDocPtr doc);
			
 
				+void free_xpath_objects(xmlXPathContextPtr ctx, xmlXPathObjectPtr obj);
			
 
				+SearchResult *alloc_results_array(int capacity, int max_results);
			
 
				+void assign_result(SearchResult *result, char *url, char *title, char *snippet,
			
 
				+                   int unescape);
			
 
				+void free_xml_node_list(char *title, char *url, char *snippet);
			
 
				+
			
 
				 int scrape_engine(const SearchEngine *engine, const char *query,
			
 
				                   SearchResult **out_results, int max_results);
			
 
				 
			
--- a/src/Scraping/ScrapingHttp.c
+++ b/src/Scraping/ScrapingHttp.c
@@ -0,0 +1,109 @@
 
				+#include "../Proxy/Proxy.h"
			
 
				+#include "Config.h"
			
 
				+#include "Scraping.h"
			
 
				+#include <curl/curl.h>
			
 
				+#include <stdio.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+#include <unistd.h>
			
 
				+
			
 
				+#define HTTP_DELAY_MIN_US 100000
			
 
				+#define HTTP_DELAY_RANGE_US 100000
			
 
				+
			
 
				+static const char *USER_AGENTS[] = {
			
 
				+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, "
			
 
				+    "like Gecko) Chrome/120.0.0.0 Safari/537.36",
			
 
				+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
			
 
				+    "(KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
			
 
				+    "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like "
			
 
				+    "Gecko) Chrome/120.0.0.0 Safari/537.36",
			
 
				+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 "
			
 
				+    "Firefox/121.0",
			
 
				+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 "
			
 
				+    "(KHTML, like Gecko) Version/17.2 Safari/605.1.15"};
			
 
				+
			
 
				+#define USER_AGENT_COUNT (sizeof(USER_AGENTS) / sizeof(USER_AGENTS[0]))
			
 
				+
			
 
				+size_t write_memory_callback(void *contents, size_t size, size_t nmemb,
			
 
				+                             void *userp) {
			
 
				+  size_t realsize = size * nmemb;
			
 
				+  MemoryBuffer *mem = (MemoryBuffer *)userp;
			
 
				+
			
 
				+  if (mem->size + realsize + 1 > mem->capacity) {
			
 
				+    size_t new_cap =
			
 
				+        mem->capacity == 0 ? INITIAL_BUFFER_SIZE : mem->capacity * 2;
			
 
				+    while (new_cap < mem->size + realsize + 1)
			
 
				+      new_cap *= 2;
			
 
				+
			
 
				+    char *ptr = (char *)realloc(mem->memory, new_cap);
			
 
				+    if (!ptr)
			
 
				+      return 0;
			
 
				+    mem->memory = ptr;
			
 
				+    mem->capacity = new_cap;
			
 
				+  }
			
 
				+
			
 
				+  memcpy(&(mem->memory[mem->size]), contents, realsize);
			
 
				+  mem->size += realsize;
			
 
				+  mem->memory[mem->size] = 0;
			
 
				+
			
 
				+  return realsize;
			
 
				+}
			
 
				+
			
 
				+const char *get_random_user_agent(void) {
			
 
				+  return USER_AGENTS[rand() % USER_AGENT_COUNT];
			
 
				+}
			
 
				+
			
 
				+void configure_curl_handle(CURL *curl, const char *full_url,
			
 
				+                           MemoryBuffer *chunk, struct curl_slist *headers) {
			
 
				+  curl_easy_setopt(curl, CURLOPT_URL, full_url);
			
 
				+  curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
			
 
				+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_memory_callback);
			
 
				+  curl_easy_setopt(curl, CURLOPT_WRITEDATA, (void *)chunk);
			
 
				+  curl_easy_setopt(curl, CURLOPT_USERAGENT, get_random_user_agent());
			
 
				+
			
 
				+  curl_easy_setopt(curl, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
			
 
				+  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
			
 
				+  curl_easy_setopt(curl, CURLOPT_DNS_CACHE_TIMEOUT, CURL_DNS_TIMEOUT_SECS);
			
 
				+  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
			
 
				+  curl_easy_setopt(curl, CURLOPT_TIMEOUT, CURL_TIMEOUT_SECS);
			
 
				+  curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 1L);
			
 
				+  curl_easy_setopt(curl, CURLOPT_COOKIEFILE, "");
			
 
				+
			
 
				+  apply_proxy_settings(curl);
			
 
				+}
			
 
				+
			
 
				+char *build_search_url(const char *base_url, const char *page_param,
			
 
				+                       int page_multiplier, int page_base,
			
 
				+                       const char *encoded_query, int page) {
			
 
				+  int page_value = (page < 1 ? 1 : page - 1) * page_multiplier + page_base;
			
 
				+  char *url = malloc(BUFFER_SIZE_LARGE);
			
 
				+  if (!url)
			
 
				+    return NULL;
			
 
				+  snprintf(url, BUFFER_SIZE_LARGE, "%s%s&%s=%d", base_url, encoded_query,
			
 
				+           page_param, page_value);
			
 
				+  return url;
			
 
				+}
			
 
				+
			
 
				+struct curl_slist *build_request_headers(const char *host_header,
			
 
				+                                         const char *referer) {
			
 
				+  struct curl_slist *headers = NULL;
			
 
				+  char host_buf[BUFFER_SIZE_MEDIUM], ref_buf[BUFFER_SIZE_MEDIUM];
			
 
				+
			
 
				+  snprintf(host_buf, sizeof(host_buf), "Host: %s", host_header);
			
 
				+  snprintf(ref_buf, sizeof(ref_buf), "Referer: %s", referer);
			
 
				+
			
 
				+  headers = curl_slist_append(headers, host_buf);
			
 
				+  headers = curl_slist_append(headers, ref_buf);
			
 
				+  headers = curl_slist_append(
			
 
				+      headers,
			
 
				+      "Accept: "
			
 
				+      "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
			
 
				+  headers = curl_slist_append(headers, "Accept-Language: en-US,en;q=0.5");
			
 
				+  headers = curl_slist_append(headers, "DNT: 1");
			
 
				+
			
 
				+  return headers;
			
 
				+}
			
 
				+
			
 
				+void http_delay(void) {
			
 
				+  usleep(HTTP_DELAY_MIN_US + (rand() % HTTP_DELAY_RANGE_US));
			
 
				+}
			
--- a/src/Scraping/ScrapingParsers.c
+++ b/src/Scraping/ScrapingParsers.c
@@ -0,0 +1,269 @@
 
				+#include "../Utility/Unescape.h"
			
 
				+#include "../Utility/XmlHelper.h"
			
 
				+#include "Config.h"
			
 
				+#include "Scraping.h"
			
 
				+#include <libxml/HTMLparser.h>
			
 
				+#include <libxml/xpath.h>
			
 
				+#include <stdlib.h>
			
 
				+#include <string.h>
			
 
				+
			
 
				+xmlXPathContextPtr create_xpath_context(xmlDocPtr doc) {
			
 
				+  return xmlXPathNewContext(doc);
			
 
				+}
			
 
				+
			
 
				+void free_xpath_objects(xmlXPathContextPtr ctx, xmlXPathObjectPtr obj) {
			
 
				+  if (obj)
			
 
				+    xmlXPathFreeObject(obj);
			
 
				+  if (ctx)
			
 
				+    xmlXPathFreeContext(ctx);
			
 
				+}
			
 
				+
			
 
				+SearchResult *alloc_results_array(int capacity, int max_results) {
			
 
				+  int count = capacity < max_results ? capacity : max_results;
			
 
				+  return xml_result_alloc(capacity, count);
			
 
				+}
			
 
				+
			
 
				+void assign_result(SearchResult *result, char *url, char *title, char *snippet,
			
 
				+                   int unescape) {
			
 
				+  result->url = unescape ? unescape_search_url(url) : strdup(url ? url : "");
			
 
				+  result->title = strdup(title ? title : "No Title");
			
 
				+  result->snippet = strdup(snippet ? snippet : "");
			
 
				+}
			
 
				+
			
 
				+void free_xml_node_list(char *title, char *url, char *snippet) {
			
 
				+  if (title)
			
 
				+    xmlFree(title);
			
 
				+  if (url)
			
 
				+    xmlFree(url);
			
 
				+  if (snippet)
			
 
				+    xmlFree(snippet);
			
 
				+}
			
 
				+
			
 
				+static int parse_ddg_lite(const char *engine_name, xmlDocPtr doc,
			
 
				+                          SearchResult **out_results, int max_results) {
			
 
				+  (void)engine_name;
			
 
				+  int found_count = 0;
			
 
				+
			
 
				+  xmlXPathContextPtr ctx = create_xpath_context(doc);
			
 
				+  if (!ctx)
			
 
				+    return 0;
			
 
				+
			
 
				+  xmlXPathObjectPtr obj =
			
 
				+      xml_xpath_eval(ctx, "//tr[not(contains(@class, "
			
 
				+                          "'result-sponsored'))]//a[@class='result-link']");
			
 
				+
			
 
				+  if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
			
 
				+    free_xpath_objects(ctx, obj);
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  int num_links = obj->nodesetval->nodeNr;
			
 
				+  *out_results = alloc_results_array(num_links, max_results);
			
 
				+  if (!*out_results) {
			
 
				+    free_xpath_objects(ctx, obj);
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  for (int i = 0; i < num_links && found_count < max_results; i++) {
			
 
				+    xmlNodePtr link_node = obj->nodesetval->nodeTab[i];
			
 
				+    char *title = xml_node_content(link_node);
			
 
				+    char *url = (char *)xmlGetProp(link_node, (xmlChar *)"href");
			
 
				+    char *snippet_text = NULL;
			
 
				+
			
 
				+    xmlNodePtr current = link_node->parent;
			
 
				+    while (current && xmlStrcasecmp(current->name, (const xmlChar *)"tr") != 0)
			
 
				+      current = current->parent;
			
 
				+
			
 
				+    if (current && current->next) {
			
 
				+      xmlNodePtr snippet_row = current->next;
			
 
				+      while (snippet_row &&
			
 
				+             xmlStrcasecmp(snippet_row->name, (const xmlChar *)"tr") != 0)
			
 
				+        snippet_row = snippet_row->next;
			
 
				+      if (snippet_row) {
			
 
				+        ctx->node = snippet_row;
			
 
				+        xmlXPathObjectPtr s_obj =
			
 
				+            xml_xpath_eval(ctx, ".//td[@class='result-snippet']");
			
 
				+        if (s_obj && s_obj->nodesetval && s_obj->nodesetval->nodeNr > 0)
			
 
				+          snippet_text = xml_node_content(s_obj->nodesetval->nodeTab[0]);
			
 
				+        if (s_obj)
			
 
				+          xmlXPathFreeObject(s_obj);
			
 
				+        ctx->node = NULL;
			
 
				+      }
			
 
				+    }
			
 
				+
			
 
				+    assign_result(&(*out_results)[found_count], url, title, snippet_text, 1);
			
 
				+    free_xml_node_list(title, url, snippet_text);
			
 
				+    found_count++;
			
 
				+  }
			
 
				+
			
 
				+  free_xpath_objects(ctx, obj);
			
 
				+  return found_count;
			
 
				+}
			
 
				+
			
 
				+static int parse_startpage(const char *engine_name, xmlDocPtr doc,
			
 
				+                           SearchResult **out_results, int max_results) {
			
 
				+  (void)engine_name;
			
 
				+  int found_count = 0;
			
 
				+
			
 
				+  xmlXPathContextPtr ctx = create_xpath_context(doc);
			
 
				+  if (!ctx)
			
 
				+    return 0;
			
 
				+
			
 
				+  xmlXPathObjectPtr obj =
			
 
				+      xml_xpath_eval(ctx, "//div[contains(@class, 'result')]");
			
 
				+
			
 
				+  if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
			
 
				+    free_xpath_objects(ctx, obj);
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  int num_results = obj->nodesetval->nodeNr;
			
 
				+  *out_results = alloc_results_array(num_results, max_results);
			
 
				+  if (!*out_results) {
			
 
				+    free_xpath_objects(ctx, obj);
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  for (int i = 0; i < num_results && found_count < max_results; i++) {
			
 
				+    xmlNodePtr result_node = obj->nodesetval->nodeTab[i];
			
 
				+    ctx->node = result_node;
			
 
				+
			
 
				+    xmlXPathObjectPtr link_obj =
			
 
				+        xml_xpath_eval(ctx, ".//a[contains(@class, 'result-link')]");
			
 
				+    char *url =
			
 
				+        (link_obj && link_obj->nodesetval && link_obj->nodesetval->nodeNr > 0)
			
 
				+            ? (char *)xmlGetProp(link_obj->nodesetval->nodeTab[0],
			
 
				+                                 (xmlChar *)"href")
			
 
				+            : NULL;
			
 
				+
			
 
				+    xmlXPathObjectPtr title_obj =
			
 
				+        xml_xpath_eval(ctx, ".//h2[contains(@class, 'wgl-title')]");
			
 
				+    char *title = (title_obj && title_obj->nodesetval &&
			
 
				+                   title_obj->nodesetval->nodeNr > 0)
			
 
				+                      ? xml_node_content(title_obj->nodesetval->nodeTab[0])
			
 
				+                      : NULL;
			
 
				+
			
 
				+    xmlXPathObjectPtr snippet_obj =
			
 
				+        xml_xpath_eval(ctx, ".//p[contains(@class, 'description')]");
			
 
				+    char *snippet_text =
			
 
				+        (snippet_obj && snippet_obj->nodesetval &&
			
 
				+         snippet_obj->nodesetval->nodeNr > 0)
			
 
				+            ? xml_node_content(snippet_obj->nodesetval->nodeTab[0])
			
 
				+            : NULL;
			
 
				+
			
 
				+    if (url && title) {
			
 
				+      assign_result(&(*out_results)[found_count], url, title, snippet_text, 0);
			
 
				+      found_count++;
			
 
				+    }
			
 
				+
			
 
				+    free_xml_node_list(title, url, snippet_text);
			
 
				+    if (link_obj)
			
 
				+      xmlXPathFreeObject(link_obj);
			
 
				+    if (title_obj)
			
 
				+      xmlXPathFreeObject(title_obj);
			
 
				+    if (snippet_obj)
			
 
				+      xmlXPathFreeObject(snippet_obj);
			
 
				+  }
			
 
				+
			
 
				+  ctx->node = NULL;
			
 
				+  free_xpath_objects(ctx, obj);
			
 
				+  return found_count;
			
 
				+}
			
 
				+
			
 
				+static int parse_yahoo(const char *engine_name, xmlDocPtr doc,
			
 
				+                       SearchResult **out_results, int max_results) {
			
 
				+  (void)engine_name;
			
 
				+  int found_count = 0;
			
 
				+
			
 
				+  xmlXPathContextPtr ctx = create_xpath_context(doc);
			
 
				+  if (!ctx)
			
 
				+    return 0;
			
 
				+
			
 
				+  xmlXPathObjectPtr obj =
			
 
				+      xml_xpath_eval(ctx, "//div[contains(@class, 'algo-sr')]");
			
 
				+
			
 
				+  if (!obj || !obj->nodesetval || obj->nodesetval->nodeNr == 0) {
			
 
				+    free_xpath_objects(ctx, obj);
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  int num_results = obj->nodesetval->nodeNr;
			
 
				+  *out_results = alloc_results_array(num_results, max_results);
			
 
				+  if (!*out_results) {
			
 
				+    free_xpath_objects(ctx, obj);
			
 
				+    return 0;
			
 
				+  }
			
 
				+
			
 
				+  for (int i = 0; i < num_results && found_count < max_results; i++) {
			
 
				+    xmlNodePtr result_node = obj->nodesetval->nodeTab[i];
			
 
				+    ctx->node = result_node;
			
 
				+
			
 
				+    xmlXPathObjectPtr link_obj = xml_xpath_eval(
			
 
				+        ctx, ".//div[contains(@class, 'compTitle')]//a[@target='_blank']");
			
 
				+    char *url =
			
 
				+        (link_obj && link_obj->nodesetval && link_obj->nodesetval->nodeNr > 0)
			
 
				+            ? (char *)xmlGetProp(link_obj->nodesetval->nodeTab[0],
			
 
				+                                 (xmlChar *)"href")
			
 
				+            : NULL;
			
 
				+
			
 
				+    xmlXPathObjectPtr title_obj =
			
 
				+        xml_xpath_eval(ctx, ".//h3[contains(@class, 'title')]");
			
 
				+    char *title = (title_obj && title_obj->nodesetval &&
			
 
				+                   title_obj->nodesetval->nodeNr > 0)
			
 
				+                      ? xml_node_content(title_obj->nodesetval->nodeTab[0])
			
 
				+                      : NULL;
			
 
				+
			
 
				+    xmlXPathObjectPtr snippet_obj =
			
 
				+        xml_xpath_eval(ctx, ".//div[contains(@class, 'compText')]//p");
			
 
				+    char *snippet_text =
			
 
				+        (snippet_obj && snippet_obj->nodesetval &&
			
 
				+         snippet_obj->nodesetval->nodeNr > 0)
			
 
				+            ? xml_node_content(snippet_obj->nodesetval->nodeTab[0])
			
 
				+            : NULL;
			
 
				+
			
 
				+    if (url && title) {
			
 
				+      assign_result(&(*out_results)[found_count], url, title, snippet_text, 1);
			
 
				+      found_count++;
			
 
				+    }
			
 
				+
			
 
				+    free_xml_node_list(title, url, snippet_text);
			
 
				+    if (link_obj)
			
 
				+      xmlXPathFreeObject(link_obj);
			
 
				+    if (title_obj)
			
 
				+      xmlXPathFreeObject(title_obj);
			
 
				+    if (snippet_obj)
			
 
				+      xmlXPathFreeObject(snippet_obj);
			
 
				+  }
			
 
				+
			
 
				+  ctx->node = NULL;
			
 
				+  free_xpath_objects(ctx, obj);
			
 
				+  return found_count;
			
 
				+}
			
 
				+
			
 
				+const SearchEngine ENGINE_REGISTRY[] = {
			
 
				+    {.name = "DuckDuckGo Lite",
			
 
				+     .base_url = "https://lite.duckduckgo.com/lite/?q=",
			
 
				+     .host_header = "lite.duckduckgo.com",
			
 
				+     .referer = "https://lite.duckduckgo.com/",
			
 
				+     .page_param = "s",
			
 
				+     .page_multiplier = 30,
			
 
				+     .page_base = 0,
			
 
				+     .parser = parse_ddg_lite},
			
 
				+    {.name = "Startpage",
			
 
				+     .base_url = "https://www.startpage.com/sp/search?q=",
			
 
				+     .host_header = "www.startpage.com",
			
 
				+     .referer = "https://www.startpage.com/",
			
 
				+     .page_param = "page",
			
 
				+     .page_multiplier = 1,
			
 
				+     .page_base = 1,
			
 
				+     .parser = parse_startpage},
			
 
				+    {.name = "Yahoo",
			
 
				+     .base_url = "https://search.yahoo.com/search?p=",
			
 
				+     .host_header = "search.yahoo.com",
			
 
				+     .referer = "https://search.yahoo.com/",
			
 
				+     .page_param = "b",
			
 
				+     .page_multiplier = 10,
			
 
				+     .page_base = 1,
			
 
				+     .parser = parse_yahoo}};
			
 
				+
			
 
				+const int ENGINE_COUNT = sizeof(ENGINE_REGISTRY) / sizeof(SearchEngine);