Commit 67ea911c authored by Jeffery To's avatar Jeffery To Committed by Christian Hergert

python3.lang: Allow non-ASCII characters in identifiers

Python 3 allows (some) Unicode characters in identifiers[1]. This
changes highlighting for identifiers, e.g. function/class names, to
account for these characters.

Fixes #31.

[1]: https://docs.python.org/3/reference/lexical_analysis.html#identifiers
parent 79de0193
......@@ -34,6 +34,7 @@
<style id="string" name="String" map-to="python:string"/>
<style id="escaped-char" name="Escaped Character" map-to="python:escaped-char"/>
<style id="format" name="Format" map-to="python:format"/>
<style id="string-conversion" name="String Conversion" map-to="python:string-conversion"/>
<style id="special-variable" name="Special Variable" map-to="python:special-variable"/>
<style id="boolean" name="Boolean" map-to="python:boolean"/>
<style id="floating-point" name="Floating point number" map-to="python:floating-point"/>
......@@ -43,12 +44,50 @@
<style id="builtin-constant" name="Builtin Constant" map-to="python:builtin-constant"/>
<style id="builtin-object" name="Builtin Object" map-to="python:builtin-object"/>
<style id="builtin-function" name="Builtin Function" map-to="python:builtin-function"/>
<style id="function-name" name="Function Name" map-to="python:function-name"/>
<style id="class-name" name="Class Name" map-to="python:class-name"/>
<style id="decorator" name="Decorator" map-to="python:decorator"/>
</styles>
<definitions>
<define-regex id="identifier">[_a-zA-Z][_a-zA-Z0-9]*</define-regex>
<!-- https://docs.python.org/3/reference/lexical_analysis.html#identifiers -->
<define-regex id="id-start" extended="true">
(?:
[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_] |
[\x{1885}-\x{1886}\x{2118}\x{212E}\x{309B}-\x{309C}] # Other_ID_Start (Unicode 12.1.0)
)
</define-regex>
<define-regex id="id-continue" extended="true">
(?:
\%{id-start} |
[\p{Mn}\p{Mc}\p{Nd}\p{Pc}] |
[\x{00B7}\x{0387}\x{1369}-\x{1371}\x{19DA}] # Other_ID_Continue (Unicode 12.1.0)
)
</define-regex>
<define-regex id="identifier" extended="true">
(?&gt; \%{id-start} \%{id-continue}* )
</define-regex>
<define-regex id="number">[1-9][0-9]*</define-regex>
<define-regex id="identifier-path" extended="true">
(\%{identifier}\.)*\%{identifier}
</define-regex>
<define-regex id="relative-path" extended="true">
(\.*\%{identifier-path}|\.+)
</define-regex>
<!-- http://docs.python.org/lib/typesseq-strings.html -->
<context id="format" style-ref="format" extend-parent="false">
<match extended="true">
% # leading % sign
\(\%{identifier}\)? # mapping key
[#0\-\ \+]* # conversion flags
(\-?\%{number}|\*)? # minimum field width
(\.(\-?\%{number}|\*))? # precision
(hlL)? # lentgh modifier
[diouxXeEfFgGcrs%] # conversion type
</match>
</context>
<define-regex id="string-prefix">(b|B)?</define-regex>
<define-regex id="raw-string-prefix">(r|R|rb|RB|rB|Rb|br|BR|bR|Br)</define-regex>
......@@ -57,7 +96,7 @@
<start>\%{string-prefix}"""</start>
<end>"""</end>
<include>
<context ref="python:format"/>
<context ref="format"/>
<context ref="python:escaped-char"/>
</include>
</context>
......@@ -66,7 +105,7 @@
<start>\%{string-prefix}'''</start>
<end>'''</end>
<include>
<context ref="python:format"/>
<context ref="format"/>
<context ref="python:escaped-char"/>
</include>
</context>
......@@ -75,7 +114,7 @@
<start>\%{string-prefix}"</start>
<end>"</end>
<include>
<context ref="python:format"/>
<context ref="format"/>
<context ref="python:escaped-char"/>
<context ref="def:line-continue"/>
</include>
......@@ -85,7 +124,7 @@
<start>\%{string-prefix}'</start>
<end>'</end>
<include>
<context ref="python:format"/>
<context ref="format"/>
<context ref="python:escaped-char"/>
<context ref="def:line-continue"/>
</include>
......@@ -95,7 +134,7 @@
<start>\%{raw-string-prefix}"""</start>
<end>"""</end>
<include>
<context ref="python:format"/>
<context ref="format"/>
</include>
</context>
......@@ -103,7 +142,7 @@
<start>\%{raw-string-prefix}'''</start>
<end>'''</end>
<include>
<context ref="python:format"/>
<context ref="format"/>
</include>
</context>
......@@ -111,7 +150,7 @@
<start>\%{raw-string-prefix}"</start>
<end>"</end>
<include>
<context ref="python:format"/>
<context ref="format"/>
<context ref="def:line-continue"/>
</include>
</context>
......@@ -120,11 +159,55 @@
<start>\%{raw-string-prefix}'</start>
<end>'</end>
<include>
<context ref="python:format"/>
<context ref="format"/>
<context ref="def:line-continue"/>
</include>
</context>
<context id="module-handler-from">
<match extended="true">
(from)
\s+
(\%{relative-path})
</match>
<include>
<context sub-pattern="1" style-ref="module-handler"/>
<context sub-pattern="2" style-ref="class-name"/>
</include>
</context>
<!-- 'def' is also present in the 'keyword' context, but has a lower
priority, so 'def' is highlighted even if the function name doesn't match. -->
<context id="function-definition">
<match extended="true">
(def)
\s+
(\%{identifier})
</match>
<include>
<context sub-pattern="1" style-ref="keyword"/>
<context sub-pattern="2" style-ref="function-name"/>
</include>
</context>
<!-- 'class' is also present in the 'keyword' context, but has a lower
priority, so 'class' is highlighted even if the class name doesn't match. -->
<context id="class-definition">
<match extended="true">
(class)
\s+
(\%{identifier})
</match>
<include>
<context sub-pattern="1" style-ref="keyword"/>
<context sub-pattern="2" style-ref="class-name"/>
</include>
</context>
<context id="decorator" style-ref="decorator">
<match>@\%{identifier-path}</match>
</context>
<context id="python3" class="no-spell-check">
<include>
<context ref="def:shebang"/>
......@@ -159,11 +242,11 @@
<context ref="python:hex"/>
<context ref="python:module-handler-from"/>
<context ref="module-handler-from"/>
<context ref="python:module-handler"/>
<context ref="python:function-definition"/>
<context ref="python:class-definition"/>
<context ref="python:decorator"/>
<context ref="function-definition"/>
<context ref="class-definition"/>
<context ref="decorator"/>
<context ref="python:keywords"/>
<context id="3x-only-keywords" style-ref="keyword">
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment