import re import requests from bs4 import BeautifulSoup from requests.exceptions import HTTPError, Timeout, RequestException # FHEM server URL base FHEM_URL_BASE = "https://fhem.auwiesen2.de/fhem" # CSRF token (replace with your actual token) CSRF_TOKEN = "csrf_611440676390392" # Headers including CSRF token HEADERS = { "X-FHEM-csrfToken": CSRF_TOKEN, "Content-Type": "application/x-www-form-urlencoded", "Accept": "text/html" } # Session to handle requests session = requests.Session() def fetch_device_log(device_id, year): """Fetch the log file for the specified device and year.""" log_url = f"{FHEM_URL_BASE}/FileLog_logWrapper&dev=FileLog_{device_id}&type=text&file={device_id}-{year}.log" try: response = session.get(log_url, headers=HEADERS, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Extract the log data log_content = soup.find('pre', class_='log') if log_content: log_text = log_content.text.strip() # Only keep lines that start with the expected date format log_lines = [line for line in log_text.splitlines() if re.match(r'^\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2}', line)] return log_lines else: print(f"No log data found for device {device_id} in year {year}.") return None except (HTTPError, Timeout) as err: print(f"Error fetching log for device {device_id}: {err}") return None except RequestException as req_err: print(f"An error occurred: {req_err}") return None def analyze_log_format(log_lines): """Analyze the log lines to propose a table structure.""" if not log_lines: return None parameter_patterns = {} timestamp_pattern = r"(\d{4}-\d{2}-\d{2}_\d{2}:\d{2}:\d{2})" device_pattern = r"(MA_[a-zA-Z0-9]+)" for line in log_lines: match = re.match(fr"{timestamp_pattern} {device_pattern} (.*)", line) if match: timestamp = match.group(1) device_id = match.group(2) parameters = match.group(3).split() for param in parameters: key_value = param.split(":") if len(key_value) == 2: key, value = key_value[0], key_value[1] if key not in parameter_patterns: if value.isdigit(): parameter_patterns[key] = "INT" elif re.match(r"^\d+(\.\d+)?$", value): parameter_patterns[key] = "FLOAT" else: parameter_patterns[key] = "VARCHAR(255)" # Propose table structure table_structure = { "device_id": "VARCHAR(255)", "timestamp": "DATETIME" } table_structure.update(parameter_patterns) return table_structure def print_table_structure(table_structure): """Print the proposed table structure.""" print("Proposed Table Structure:") print("CREATE TABLE device_logs (") for column, dtype in table_structure.items(): print(f" {column} {dtype},") print(" PRIMARY KEY (device_id, timestamp)") print(");") def main(device_id, year=2024): log_lines = fetch_device_log(device_id, year) if log_lines: # Print the first 100 lines of the log print(f"First 100 lines of the log for device {device_id}:\n") for i, line in enumerate(log_lines[:100]): print(f"{i + 1}: {line}") table_structure = analyze_log_format(log_lines) if table_structure: print("\nAnalyzing log format...") print_table_structure(table_structure) else: print("No valid log data found to analyze.") else: print("No log data retrieved.") if __name__ == "__main__": # Example device ID to test with example_device_id = "MA_030e8b3e5bc3" main(example_device_id)