Download all files in one evtgen dataset using https protocol

rakhi · 19 November 2025 14:39

Is there a quick way to download all hepmc files in one evtget dataset from outside CERN, similar to e.g. Downloading all the files for a dataset together but for evtgen?

zmarshal · 19 November 2025 20:37

Hi @rakhi ,

Easiest and with fewest dependencies / black magic is probably just a little loop:

# Get atlasopenmagic for dealing with metadata
import atlasopenmagic as atom
# Get the urllib library for https-based url retrieval
import urllib.request
# Set the release and get the URLs we want for our sample
atom.set_release('2025r-evgen-13p6tev')
my_urls = atom.get_urls('510203', protocol='https', cache=False)
# Keep a list of our local files for convenience
downloaded_files = []
# Loop through all the urls and download the files
for a_url in my_urls:
    print(f'Downloading {a_url}')
    (local_filename,headers) = urllib.request.urlretrieve(a_url)
    # Done downloading, add the resulting file to our list
    downloaded_files += [local_filename]
print(f'Downloaded files: {downloaded_files}')

Best,
Zach

rakhi · 20 November 2025 10:33

thanks for the response Zach. I tried that and it gave me this error message:
File ~/miniconda3/envs/python3.10/lib/python3.10/urllib/request.py:241, in urlretrieve(url, filename, reporthook, data)
224 “”"
225 Retrieve a URL into a temporary location on disk.
226
(…)
237 data file as well as the resulting HTTPMessage object.
238 “”"
239 url_type, path = _splittype(url)
→ 241 with contextlib.closing(urlopen(url, data)) as fp:
242 headers = fp.info()
244 # Just return the local path and the “headers” for file://
245 # URLs. No sense in performing a copy unless requested.

File ~/miniconda3/envs/python3.10/lib/python3.10/urllib/request.py:216, in urlopen(url, data, timeout, cafile, capath, cadefault, context)
214 else:
215 opener = _opener
→ 216 return opener.open(url, data, timeout)

File ~/miniconda3/envs/python3.10/lib/python3.10/urllib/request.py:525, in OpenerDirector.open(self, fullurl, data, timeout)
523 for processor in self.process_response.get(protocol, ):
524 meth = getattr(processor, meth_name)
→ 525 response = meth(req, response)
527 return response

File ~/miniconda3/envs/python3.10/lib/python3.10/urllib/request.py:634, in HTTPErrorProcessor.http_response(self, request, response)
631 # According to RFC 2616, “2xx” code indicates that the client’s
632 # request was successfully received, understood, and accepted.
633 if not (200 <= code < 300):
→ 634 response = self.parent.error(
635 ‘http’, request, response, code, msg, hdrs)
637 return response

File ~/miniconda3/envs/python3.10/lib/python3.10/urllib/request.py:563, in OpenerDirector.error(self, proto, *args)
561 if http_err:
562 args = (dict, ‘default’, ‘http_error_default’) + orig_args
→ 563 return self._call_chain(*args)

File ~/miniconda3/envs/python3.10/lib/python3.10/urllib/request.py:496, in OpenerDirector._call_chain(self, chain, kind, meth_name, *args)
494 for handler in handlers:
495 func = getattr(handler, meth_name)
→ 496 result = func(*args)
497 if result is not None:
498 return result

File ~/miniconda3/envs/python3.10/lib/python3.10/urllib/request.py:643, in HTTPDefaultErrorHandler.http_error_default(self, req, fp, code, msg, hdrs)
642 def http_error_default(self, req, fp, code, msg, hdrs):
→ 643 raise HTTPError(req.full_url, code, msg, hdrs, fp)

HTTPError: HTTP Error 504: Gateway Time-out

rakhi · 20 November 2025 11:00

resolved! I added a longer timeout with the code:

import socket
#Set a longer timeout (default is often too short)
socket.setdefaulttimeout(60) # 60 seconds

rakhi · 8 December 2025 13:43

just resurrecting this thread - is there some hard limit on the download bandwidth or speed for these datasets? The student I work with is saying he can’t get them to download at more than 1MB/s, which for a large dataset like e.g. W+c would take a not-insignificant time to download.