import requests
import time
import re
import os
OUTPUT_FOLDER = 'f://results'
IMAGES_FOLDER = os.path.join(OUTPUT_FOLDER, 'f://images')
LOG_FILE = os.path.join(OUTPUT_FOLDER, 'f://log.txt')
num = 240658039
error_num = 0
downloaded_urls = set()
os.makedirs(IMAGES_FOLDER, exist_ok=True)
def log_message(message):
print(message)
with open(LOG_FILE, 'a', encoding='utf-8') as log:
log.write(message + '\n')
def sanitize_filename(filename):
return re.sub(r'[^a-zA-Z0-9а-яА-ЯёЁ]', '', filename)
def download_image(url, filename):
try:
response = requests.get(url, stream=True)
response.raise_for_status()
with open(filename, 'wb') as file:
file.write(response.content)
log_message(f"Скачано: {filename}")
except requests.RequestException as e:
log_message(f"Ошибка при скачивании {url}: {e}")
def process_question(data, question_id):
global downloaded_urls
try:
images = data['result']['question']['data']['content'][0]['images']
title = sanitize_filename(data['result']['question']['data']["title"])
for i, image in enumerate(images):
url = image['sizes']['origin']
if url in downloaded_urls:
log_message(f"Пропуск: изображение уже скачано {url}")
continue
file_extension = os.path.splitext(os.path.basename(url))[1]
filename = os.path.join(IMAGES_FOLDER, f"{title}_{i}{file_extension}")
download_image(url, filename)
downloaded_urls.add(url)
return True
except (IndexError, KeyError) as e:
log_message(f"Ошибка обработки вопроса {question_id}: {e}")
return False
while True:
try:
response = requests.get(f'https://otvet.mail.ru/api/v1/questions/{num}')
if response.status_code == 200:
data = response.json()
if process_question(data, num):
error_num = 0
num += 1
else:
num += 1
elif response.status_code == 400:
data = response.json()
if data.get("error", {}).get("message") == "question with specified criteria is not found":
log_message(f"Вопрос {num} не найден.")
error_num += 1
if error_num >= 20:
num += 1
error_num = 0
log_message("Переходим к следующему вопросу.")
time.sleep(1)
else:
log_message(f"Неожиданный код ответа: {response.status_code}")
num += 1
except requests.RequestException as e:
log_message(f"Ошибка сети: {e}")
time.sleep(5)