The post Saving a File that automatically downloads from a url using Python appeared first on LoginVast.Com.
I’m trying to login to a website and download a csv. I’ve managed to login to the website and find the button that lets me download the csv. Once I click it, I get a new url which should automatically download the csv file. However, I have no idea how to save the url. Here is what I have so far:
import pandas as pd
from dotenv import load_dotenv, find_dotenv
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.common.by import By
# import re
from datetime import datetime, timedelta
import requests
from io import StringIO
import os
import csv
import json
# Load the .env with the secure credentials in it
os.chdir('/Users/alexfischer/Projects/Tetiaroa_Weather')
download_dir="/Users/alexfischer/Projects/Tetiaroa_Weather/Tetiaroa.csv"
load_dotenv(find_dotenv())
## Make sure path to the working directory is correct
os.chdir('/Users/alexfischer/Projects/Tetiaroa_Weather')
username="TETIAROA"
password = 'TBSA_Tetiaroa987!'
# Set the timeout duration in seconds
# If the webscraper gets stuck for some reason, it will close the browser in an hour
timeout_duration = 3600 #1 hour
# Create a FirefoxOptions object to set options for the Firefox WebDriver
firefox_options = Options()
# TODO: For some reason it is not using my specified default download directory
# Set the download directory in the Firefox profile
firefox_options.set_preference("browser.download.folderList", 2)
firefox_options.set_preference("browser.download.manager.showWhenStarting", False)
firefox_options.set_preference("browser.download.dir", download_dir)
firefox_options.set_preference("browser.download.useDownloadDir", True)
firefox_options.set_preference("browser.helperApps.neverAsk.saveToDisk", "application/csv") # may be text/plain accoring to content-type in network tab
# Add the headless option
# Uncomment this to have it run in the background
firefox_options.add_argument('--headless')
try:
# Print current date and time (for the cronjob log, if using crontab)
print('Current date and time: ' + str(datetime.now()))
# Start the timer
start_time = time.time()
# Open a browser
print('Opening Firefox...')
driver = webdriver.Firefox(options=firefox_options)
# Launch the web app
print('Logging into meteo-france...')
driver.get("https://pro.meteofrance.com")
time.sleep(5)
# Find the input fields and enter the credentials
username_element = driver.find_element(By.NAME, 'login')
username_element.send_keys(username)
password_element = driver.find_element(By.NAME, 'pass')
password_element.send_keys(password)
# Click the Login Button
driver.find_element(By.XPATH,"//button[@type="submit"]").click()
time.sleep(10)
# Get the link that the csv button contains
csv_button = driver.find_element(By.CLASS_NAME,"csv")
csv_button.click()
url = csv_button.get_attribute('href') # the url it takes you to when you click submit. Opening this automatically downloads the csv
print(url) # just to check it
If I try opening the url I get using requests then I have to login again and repeat the same process over and over again. I’m stuck
The post Saving a File that automatically downloads from a url using Python appeared first on LoginVast.Com.
Hello, and welcome to StackOverflow! You might need the cookies from the Selenium webdriver that authenticates you. Try passing these cookies to
requests.get
; check this answer for how to get the cookies from Selenium.