This commit is contained in:
Andrey Sazonov 2025-10-02 21:44:23 +03:00
parent 2437a4c03f
commit 01df47b6a2

48
main.py
View File

@ -1,5 +1,8 @@
import json
import logging
import re
from datetime import datetime
import requests
from pathlib import Path
from urllib3.exceptions import InsecureRequestWarning
@ -43,11 +46,50 @@ VN_FILE = Path("vnereglament.txt")
AVAR_FILE = Path("avar.txt")
def clean_street(street: str) -> str:
street = street.replace('Белгород г; ', '').replace('\n\n', '\n').strip()
lines = [line.strip() for line in street.split('\n') if line.strip()]
return '\n'.join(lines)
if not street:
return ""
# Убираем "Белгород г;" в начале
street = re.sub(r'^Белгород г;?\s*', '', street, flags=re.IGNORECASE)
# Разбиваем по ; и \n
parts = re.split(r'[;\n]+', street)
cleaned_lines = []
for part in parts:
part = part.strip()
if not part:
continue
part = re.sub(r'\b0\s+(гараж|КНС|ГСК)\b', '', part, flags=re.IGNORECASE)
part = re.sub(r'\СК[-\s]*\d*\b', '', part, flags=re.IGNORECASE)
part = re.sub(r'\bКНС\b', '', part, flags=re.IGNORECASE)
part = re.sub(r'\bКотельная\b', '', part, flags=re.IGNORECASE)
part = re.sub(r'[\s,]+', ' ', part).strip(' ,.')
if not part:
continue
cleaned_lines.append(part)
seen = set()
unique_lines = []
for line in cleaned_lines:
key = re.sub(r'[^а-яёa-z0-9]', '', line.lower())
if key and key not in seen:
seen.add(key)
unique_lines.append(line)
formatted = []
for line in unique_lines:
formatted.append(line)
return '\n'.join(formatted) if formatted else "[Нет данных]"
def format_time(dt_str: str) -> str:
try:
dt = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
return dt.strftime('%d.%m.%Y %H:%M')
except:
return dt_str.replace('T', ' ')
def parse_and_save(data, file_path: Path, mode: str):