Python3 Web scraping REQUESTS example
Web scraping using REQUESTS Libary and using latest headers.

REQUESTS Web Scraping


import requests
from UserAgentFetcher import UserAgentFetcher


def make_request(url, headers=None):
    """
    Makes a GET request to the specified URL and prints the response content.

    Args:
        url (str): The URL to send the GET request to.
        headers (dict, optional): Custom headers for the request.

    Returns:
        None
    """
    # Send a GET request to the specified URL with optional custom headers
    response = requests.get(url, headers=headers)

    # Check if the response status code indicates success (2xx) or not
    if 200 <= response.status_code < 400:
        print(f"Request successful: {response.status_code}")
        print("Request headers:")
        for header, value in headers.items():
            print(f"{header}: {value}")
        print("Response headers:")
        for header, value in response.headers.items():
            print(f"{header}: {value}")
        print("Response content:")
        print(response.text)
    else:
        print(f"Request failed with status code: {response.status_code}")


if __name__ == "__main__":
    # Define the target URL and custom headers
    target_url = "https://www.example.com"

    ua_fetcher = UserAgentFetcher()
    random_user_agent = ua_fetcher.get_random_user_agent()
    custom_headers = {
        "User-Agent": random_user_agent,
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "upgrade-insecure-requests": "1"
    }

    # Call the make_request function with the defined URL and headers
    make_request(target_url, headers=custom_headers)

UserAgentFetcher.py

import requests
import random

class UserAgentFetcher:
    def __init__(self):
        """Initialize the UserAgentFetcher class by fetching user agents from a JSON file."""
        self.user_agents = self._fetch_user_agents()

    def _fetch_user_agents(self):
        """Fetch user agents from the specified URL and return them as a list."""
        url = 'https://jnrbsn.github.io/user-agents/user-agents.json'

        try:
            response = requests.get(url)
            response.raise_for_status()
            user_agents = response.json()
            return user_agents
        except requests.exceptions.RequestException as e:
            print(f"Error retrieving user agent data: {e}")
            return []

    def get_random_user_agent(self, search_string='Windows NT 10.0; Win64; x64'):
        """
        Get a random user agent that partially matches the given search string.

        Args:
            search_string (str): The search string to match against user agents.

        Returns:
            str: A random user agent that matches the search string, or None if no match is found.
        """
        filtered_agents = [agent for agent in self.user_agents if search_string in agent]

        if filtered_agents:
            random_agent = random.choice(filtered_agents)
            return random_agent
        else:
            return None


if __name__ == "__main__":
    ua_fetcher = UserAgentFetcher()
    random_user_agent = ua_fetcher.get_random_user_agent()

    if random_user_agent:
        print("Random User Agent:", random_user_agent)
    else:
        print("No matching user agents found.")