evaluation #6

Open
opened 2018-05-28 06:44:05 +00:00 by agp8x · 0 comments

poc:

from lxml import etree
f=open("playerid1434962064116.gpx")
p=etree.XMLParser(recover=True)
t=etree.parse(f, parser=p)
print(etree.tostring(t, pretty_print=True).decode())
t.xpath(".//gpx:desc", namespaces={'gpx':"http://www.topografix.com/GPX/1/1"})

class:

import logging
from datetime import datetime

from lxml import etree

log = logging.getLogger(__name__)

#class NeoCartoLoader(Loader):
class NeoCartoLoader:
	def load(self, file: str):
		src = open(file, "r")
		parser = etree.XMLParser(recover=True)
		tree = etree.parse(src, parser=parser)
		self.entries = []
		for point in tree.xpath("//gpx:trkpt", namespaces={'gpx':"http://www.topografix.com/GPX/1/1"}):
			try: 
				self.entries.append(self.parse_point(point))
			except ValueError as e:
				print(e, etree.tostring(point, pretty_print=True).decode())
	
	def parse_point(self, point):
		raw_lat = point.xpath("@lat")[0]
		if raw_lat.count(".") > 1:
			log.warning(f"recreate lat/lon from: {raw_lat}")
			log.warn(etree.tostring(point, pretty_print=True).decode())
			start_offset = 4
			x = raw_lat[start_offset:].index(".")
			offset = start_offset + x
			raw_lon = raw_lat[offset:]
			raw_lat = raw_lat[:offset]
		else:
			raw_lon = point.xpath("@lon")[0]
		lat = float(raw_lat)
		lon = float(raw_lon)
		times = point.xpath("gpx:time",namespaces={'gpx':"http://www.topografix.com/GPX/1/1"})
		assert len(times) == 1
		time = times[0].text
		dt = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ")
		timestamp = int(dt.timestamp()*1000)
		events = point.xpath(".//gpx:event",namespaces={'gpx':"http://www.topografix.com/GPX/1/1"})
		assert 0 <= len(events) <= 1
		event = {}
		if events:
			event = dict(events[0].attrib)
			if events[0].tail and events[0].tail.strip():
				try:
					# base case: trailing 'geoid="0"/>'
					key, v = events[0].tail.strip().split("=")
					value = v.split('"')[1]
					event[key] = value
				except:
					event['__tail__'] = events[0].tail.strip()
		
		return {
			"location": {
				"type": "Point",
				"coordinates": [lon, lat]
			},
			"timestamp": timestamp,
			"event": event,
		}

	def get_entry(self) -> object:
		for i in self.entries:
			yield i
poc: ``` from lxml import etree f=open("playerid1434962064116.gpx") p=etree.XMLParser(recover=True) t=etree.parse(f, parser=p) print(etree.tostring(t, pretty_print=True).decode()) t.xpath(".//gpx:desc", namespaces={'gpx':"http://www.topografix.com/GPX/1/1"}) ``` class: ``` import logging from datetime import datetime from lxml import etree log = logging.getLogger(__name__) #class NeoCartoLoader(Loader): class NeoCartoLoader: def load(self, file: str): src = open(file, "r") parser = etree.XMLParser(recover=True) tree = etree.parse(src, parser=parser) self.entries = [] for point in tree.xpath("//gpx:trkpt", namespaces={'gpx':"http://www.topografix.com/GPX/1/1"}): try: self.entries.append(self.parse_point(point)) except ValueError as e: print(e, etree.tostring(point, pretty_print=True).decode()) def parse_point(self, point): raw_lat = point.xpath("@lat")[0] if raw_lat.count(".") > 1: log.warning(f"recreate lat/lon from: {raw_lat}") log.warn(etree.tostring(point, pretty_print=True).decode()) start_offset = 4 x = raw_lat[start_offset:].index(".") offset = start_offset + x raw_lon = raw_lat[offset:] raw_lat = raw_lat[:offset] else: raw_lon = point.xpath("@lon")[0] lat = float(raw_lat) lon = float(raw_lon) times = point.xpath("gpx:time",namespaces={'gpx':"http://www.topografix.com/GPX/1/1"}) assert len(times) == 1 time = times[0].text dt = datetime.strptime(time, "%Y-%m-%dT%H:%M:%SZ") timestamp = int(dt.timestamp()*1000) events = point.xpath(".//gpx:event",namespaces={'gpx':"http://www.topografix.com/GPX/1/1"}) assert 0 <= len(events) <= 1 event = {} if events: event = dict(events[0].attrib) if events[0].tail and events[0].tail.strip(): try: # base case: trailing 'geoid="0"/>' key, v = events[0].tail.strip().split("=") value = v.split('"')[1] event[key] = value except: event['__tail__'] = events[0].tail.strip() return { "location": { "type": "Point", "coordinates": [lon, lat] }, "timestamp": timestamp, "event": event, } def get_entry(self) -> object: for i in self.entries: yield i ```
Sign in to join this conversation.
No Label
No Milestone
No Assignees
1 Participants
Notifications
Due Date
The due date is invalid or out of range. Please use the format 'yyyy-mm-dd'.

No due date set.

Dependencies

No dependencies set.

Reference: ma/project#6
There is no content yet.