Python3 Web scraping REQUESTS example
Web scraping using REQUESTS Libary and using latest headers.
REQUESTS Web Scraping
import requests
from UserAgentFetcher import UserAgentFetcher
def make_request(url, headers=None):
"""
Makes a GET request to the specified URL and prints the response content.
Args:
url (str): The URL to send the GET request to.
headers (dict, optional): Custom headers for the request.
Returns:
None
"""
# Send a GET request to the specified URL with optional custom headers
response = requests.get(url, headers=headers)
# Check if the response status code indicates success (2xx) or not
if 200 <= response.status_code < 400:
print(f"Request successful: {response.status_code}")
print("Request headers:")
for header, value in headers.items():
print(f"{header}: {value}")
print("Response headers:")
for header, value in response.headers.items():
print(f"{header}: {value}")
print("Response content:")
print(response.text)
else:
print(f"Request failed with status code: {response.status_code}")
if __name__ == "__main__":
# Define the target URL and custom headers
target_url = "https://www.example.com"
ua_fetcher = UserAgentFetcher()
random_user_agent = ua_fetcher.get_random_user_agent()
custom_headers = {
"User-Agent": random_user_agent,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"upgrade-insecure-requests": "1"
}
# Call the make_request function with the defined URL and headers
make_request(target_url, headers=custom_headers)
UserAgentFetcher.py
import requests
import random
class UserAgentFetcher:
def __init__(self):
"""Initialize the UserAgentFetcher class by fetching user agents from a JSON file."""
self.user_agents = self._fetch_user_agents()
def _fetch_user_agents(self):
"""Fetch user agents from the specified URL and return them as a list."""
url = 'https://jnrbsn.github.io/user-agents/user-agents.json'
try:
response = requests.get(url)
response.raise_for_status()
user_agents = response.json()
return user_agents
except requests.exceptions.RequestException as e:
print(f"Error retrieving user agent data: {e}")
return []
def get_random_user_agent(self, search_string='Windows NT 10.0; Win64; x64'):
"""
Get a random user agent that partially matches the given search string.
Args:
search_string (str): The search string to match against user agents.
Returns:
str: A random user agent that matches the search string, or None if no match is found.
"""
filtered_agents = [agent for agent in self.user_agents if search_string in agent]
if filtered_agents:
random_agent = random.choice(filtered_agents)
return random_agent
else:
return None
if __name__ == "__main__":
ua_fetcher = UserAgentFetcher()
random_user_agent = ua_fetcher.get_random_user_agent()
if random_user_agent:
print("Random User Agent:", random_user_agent)
else:
print("No matching user agents found.")