Duck Duck Go code not working

mp97 · April 20, 2021, 6:07pm

I’ve recently worked on a small image classifier using the duck duck go scraper on Colab and I didn’t have any particular problem. This is the code I used:

def search_images_ddg(key, max_n=200):
  """Search for 'key' with DuckDuckGo and return a unique urls of 'max_n' images
  (Adopted from https://github.com/deepanprabhu/duckduckgo-images-api)
  """
  url = 'https://duckduckgo.com/'
  params = {'q':key}
  res = requests.post(url,data=params)
  searchObj = re.search(r'vqd=([\d-]+)\&',res.text)

  if not searchObj:
    print('Token Parsing Failed !')
    return

  requestUrl = url + 'i.js'
  headers = {'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:71.0) Gecko/20100101 Firefox/71.0'}
  params = (('l','us-en'),('o','json'),('q',key),('vqd',searchObj.group(1)),('f',',,,'),('p','1'),('v7exp','a'))
  urls = []

  while True:
    try:
      res = requests.get(requestUrl,headers=headers,params=params)
      data = json.loads(res.text)
      for obj in data['results']:
        urls.append(obj['image'])
        max_n = max_n - 1
        if max_n < 1:
          return L(set(urls))
      if 'next' not in data:
        return L(set(urls))
      requestUrl = url + data['next']
    except:
      pass

And then I used these lines of code to scrape the images I wanted:

toyota_cars = ['4runner', 'land cruiser', 'rav4']
path = Path('/tmp/toyota_cars')

if not path.exists():
  path.mkdir()

for toyota_car in toyota_cars:
  dest = (path/toyota_car)
  dest.mkdir(exist_ok=True)
  urls = search_images_ddg(f'toyota {toyota_car}', max_n=200)
  download_images(dest, urls=urls)

images_path = get_image_files(path)

I hope it can help.