Skip to content
Snippets Groups Projects
Commit 86465ebc authored by Nils Breyer's avatar Nils Breyer
Browse files

write lock to prevent simulatenous writes to csv file

parent 93eb4b21
No related branches found
No related tags found
No related merge requests found
# Changelog
## 1.1.2 (2022-01-26)
* FIX: Simultaneous writes to csv can cause invalid format in output file
## 1.1.1 (2022-01-13)
* FIX: Initialize new connection if status file unreadable
* FIX: Connection not initialized if status file unreadable
## 1.1.0 (2022-01-13)
......
......@@ -12,7 +12,7 @@ from crawler import StreamCrawler
import crawler.trafikverket
VERSION = "1.1.1"
VERSION = "1.1.2"
parser = argparse.ArgumentParser(description="Crawler for Trafikverket's Trafikinfo API - version " + VERSION)
......
import shutil
from threading import Event, Timer, Thread
from threading import Event, Timer, Thread, Lock
from queue import Queue
import os
import requests
......@@ -47,6 +47,7 @@ class Crawler:
message_webhook_url = None
messages = Queue()
authkey = ""
write_lock = Lock()
DEBUG = False
def __init__(self, url, path, authkey, version=VERSION, output_version=OUTPUT_VERSION):
......
......@@ -219,8 +219,10 @@ class DailyTrainAnnouncementCrawler (crawler.PeriodicCrawler):
self.log_exception(e)
#write to csv
data.to_csv(filename, index=False)
self.writes += 1
with self.write_lock:
data.to_csv(filename, index=False)
self.writes += 1
if self.DEBUG:
self.log_info("Written {rows} rows.".format(rows=len(data)))
except Exception as e:
......@@ -281,8 +283,10 @@ class DailyTrainMessageCrawler (crawler.PeriodicCrawler):
self.log_exception(e)
#write to csv
data.to_csv(filename, index=False)
self.writes += 1
with self.write_lock:
data.to_csv(filename, index=False)
self.writes += 1
if self.DEBUG:
self.log_info("Written {rows} rows.".format(rows=len(data)))
except Exception as e:
......@@ -331,11 +335,14 @@ class StreamTrainAnnouncementCrawler (crawler.StreamCrawler):
data.insert(loc=0, column="TimeReceived", value=len(data)*[format_datetime(datetime.datetime.now(), utc=True)])
if self.DEBUG:
self.log_info("Received {rows} rows.".format(rows=len(data)))
#write to csv
data.to_csv(self.csvfile, index=False, mode='a', header=(not os.path.exists(self.csvfile)))
with self.write_lock:
data.to_csv(self.csvfile, index=False, mode='a', header=(not os.path.exists(self.csvfile)))
self.writes += 1
if self.DEBUG:
self.log_info("Written {rows} rows.".format(rows=len(data)))
self.writes += 1
except Exception as e:
self.log_exception(e)
......@@ -376,11 +383,14 @@ class StreamTrainMessageCrawler (crawler.StreamCrawler):
if self.DEBUG:
self.log_info("Received {rows} rows.".format(rows=len(data)))
#write to csv
data.to_csv(self.csvfile, index=False, mode='a', header=(not os.path.exists(self.csvfile)))
with self.write_lock:
data.to_csv(self.csvfile, index=False, mode='a', header=(not os.path.exists(self.csvfile)))
self.writes += 1
if self.DEBUG:
self.log_info("Written {rows} rows.".format(rows=len(data)))
self.writes += 1
except Exception as e:
self.log_exception(e)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment