117 lines
3.9 KiB
Python
117 lines
3.9 KiB
Python
import re
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
from requests.exceptions import HTTPError, Timeout, RequestException
|
|
|
|
# FHEM server URL base
|
|
FHEM_URL_BASE = "https://fhem.auwiesen2.de/fhem"
|
|
|
|
# CSRF token (replace with your actual token)
|
|
CSRF_TOKEN = "csrf_611440676390392"
|
|
|
|
# Headers including CSRF token
|
|
HEADERS = {
|
|
"X-FHEM-csrfToken": CSRF_TOKEN,
|
|
"Content-Type": "application/x-www-form-urlencoded",
|
|
"Accept": "text/html"
|
|
}
|
|
|
|
# Session to handle requests
|
|
session = requests.Session()
|
|
|
|
def fetch_device_log(device_id, year):
|
|
"""Fetch the log file for the specified device and year."""
|
|
log_url = f"{FHEM_URL_BASE}/FileLog_logWrapper&dev=FileLog_{device_id}&type=text&file={device_id}-{year}.log"
|
|
|
|
try:
|
|
response = session.get(log_url, headers=HEADERS, timeout=10)
|
|
response.raise_for_status()
|
|
soup = BeautifulSoup(response.text, 'html.parser')
|
|
|
|
# Extract the log data
|
|
log_content = soup.find('pre', class_='log')
|
|
if log_content:
|
|
log_text = log_content.text.strip()
|
|
# Only keep lines that start with the expected date format
|
|
log_lines = [line for line in log_text.splitlines() if re.match(r'^\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2}', line)]
|
|
return log_lines
|
|
else:
|
|
print(f"No log data found for device {device_id} in year {year}.")
|
|
return None
|
|
|
|
except (HTTPError, Timeout) as err:
|
|
print(f"Error fetching log for device {device_id}: {err}")
|
|
return None
|
|
except RequestException as req_err:
|
|
print(f"An error occurred: {req_err}")
|
|
return None
|
|
|
|
def analyze_log_format(log_lines):
|
|
"""Analyze the log lines to propose a table structure."""
|
|
if not log_lines:
|
|
return None
|
|
|
|
parameter_patterns = {}
|
|
timestamp_pattern = r"(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2})"
|
|
device_pattern = r"(MA_[a-zA-Z0-9]+)"
|
|
|
|
for line in log_lines:
|
|
match = re.match(fr"{timestamp_pattern} {device_pattern} (.*)", line)
|
|
if match:
|
|
timestamp = match.group(1)
|
|
device_id = match.group(2)
|
|
parameters = match.group(3).split()
|
|
|
|
for param in parameters:
|
|
key_value = param.split(":")
|
|
if len(key_value) == 2:
|
|
key, value = key_value[0], key_value[1]
|
|
if key not in parameter_patterns:
|
|
if value.isdigit():
|
|
parameter_patterns[key] = "INT"
|
|
elif re.match(r"^\d+(\.\d+)?$", value):
|
|
parameter_patterns[key] = "FLOAT"
|
|
else:
|
|
parameter_patterns[key] = "VARCHAR(255)"
|
|
|
|
# Propose table structure
|
|
table_structure = {
|
|
"device_id": "VARCHAR(255)",
|
|
"timestamp": "DATETIME"
|
|
}
|
|
table_structure.update(parameter_patterns)
|
|
|
|
return table_structure
|
|
|
|
def print_table_structure(table_structure):
|
|
"""Print the proposed table structure."""
|
|
print("Proposed Table Structure:")
|
|
print("CREATE TABLE device_logs (")
|
|
for column, dtype in table_structure.items():
|
|
print(f" {column} {dtype},")
|
|
print(" PRIMARY KEY (device_id, timestamp)")
|
|
print(");")
|
|
|
|
def main(device_id, year=2024):
|
|
log_lines = fetch_device_log(device_id, year)
|
|
|
|
if log_lines:
|
|
# Print the first 100 lines of the log
|
|
print(f"First 100 lines of the log for device {device_id}:\n")
|
|
for i, line in enumerate(log_lines[:100]):
|
|
print(f"{i + 1}: {line}")
|
|
|
|
table_structure = analyze_log_format(log_lines)
|
|
if table_structure:
|
|
print("\nAnalyzing log format...")
|
|
print_table_structure(table_structure)
|
|
else:
|
|
print("No valid log data found to analyze.")
|
|
else:
|
|
print("No log data retrieved.")
|
|
|
|
if __name__ == "__main__":
|
|
# Example device ID to test with
|
|
example_device_id = "MA_030e8b3e5bc3"
|
|
main(example_device_id)
|