update
This commit is contained in:
parent
2437a4c03f
commit
01df47b6a2
48
main.py
48
main.py
|
|
@ -1,5 +1,8 @@
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from urllib3.exceptions import InsecureRequestWarning
|
from urllib3.exceptions import InsecureRequestWarning
|
||||||
|
|
@ -43,11 +46,50 @@ VN_FILE = Path("vnereglament.txt")
|
||||||
AVAR_FILE = Path("avar.txt")
|
AVAR_FILE = Path("avar.txt")
|
||||||
|
|
||||||
def clean_street(street: str) -> str:
|
def clean_street(street: str) -> str:
|
||||||
street = street.replace('Белгород г; ', '').replace('\n\n', '\n').strip()
|
if not street:
|
||||||
lines = [line.strip() for line in street.split('\n') if line.strip()]
|
return ""
|
||||||
return '\n'.join(lines)
|
|
||||||
|
# Убираем "Белгород г;" в начале
|
||||||
|
street = re.sub(r'^Белгород г;?\s*', '', street, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
# Разбиваем по ; и \n
|
||||||
|
parts = re.split(r'[;\n]+', street)
|
||||||
|
cleaned_lines = []
|
||||||
|
|
||||||
|
for part in parts:
|
||||||
|
part = part.strip()
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
|
||||||
|
part = re.sub(r'\b0\s+(гараж|КНС|ГСК)\b', '', part, flags=re.IGNORECASE)
|
||||||
|
part = re.sub(r'\bГСК[-\s]*\d*\b', '', part, flags=re.IGNORECASE)
|
||||||
|
part = re.sub(r'\bКНС\b', '', part, flags=re.IGNORECASE)
|
||||||
|
part = re.sub(r'\bКотельная\b', '', part, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
part = re.sub(r'[\s,]+', ' ', part).strip(' ,.')
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
cleaned_lines.append(part)
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
unique_lines = []
|
||||||
|
for line in cleaned_lines:
|
||||||
|
key = re.sub(r'[^а-яёa-z0-9]', '', line.lower())
|
||||||
|
if key and key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
unique_lines.append(line)
|
||||||
|
|
||||||
|
formatted = []
|
||||||
|
for line in unique_lines:
|
||||||
|
formatted.append(line)
|
||||||
|
|
||||||
|
return '\n'.join(formatted) if formatted else "[Нет данных]"
|
||||||
|
|
||||||
def format_time(dt_str: str) -> str:
|
def format_time(dt_str: str) -> str:
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
|
||||||
|
return dt.strftime('%d.%m.%Y %H:%M')
|
||||||
|
except:
|
||||||
return dt_str.replace('T', ' ')
|
return dt_str.replace('T', ' ')
|
||||||
|
|
||||||
def parse_and_save(data, file_path: Path, mode: str):
|
def parse_and_save(data, file_path: Path, mode: str):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue
Block a user