Bonjour,
Je suis à la recherche d'une procédure permettant d'extraire l'intégralité du texte présent dans une page précise d'une secrion précise d'un document onenote.
Si vous avez des idées, je suis preneur
Merci d'avance![]()
Bonjour,
Je suis à la recherche d'une procédure permettant d'extraire l'intégralité du texte présent dans une page précise d'une secrion précise d'un document onenote.
Si vous avez des idées, je suis preneur
Merci d'avance![]()
Bonjour,
Peut-être dans cette liste.
J'ai trouvé un code à cette adresse qui pourrait faire ce dont j'ai besoin, mais il est en VBA. Je l'ai modifié pour le rendre compatible VB.NET, mais j'ai une erreur à l'exécution de la ligne 21 (Référence à un préfixe d'espace de noms non déclaré : 'one'). Si tu as une idée pour corriger cela, je suis pas contre
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 Imports MSXML2 Imports oneNote = Microsoft.Office.Interop.OneNote Public Class Form1 Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load ' Get the XML that represents the OneNote notebooks available. Dim notebookXml As String = "" Dim oneNoteAPP As New oneNote.Application ' notebookXml gets filled in with an XML document providing information ' about what OneNote notebooks are available. ' You want all the data. Thus you provide an empty string ' for the bstrStartNodeID parameter. oneNoteAPP.GetHierarchy("", oneNote.HierarchyScope.hsNotebooks, notebookXml, oneNote.XMLSchema.xs2013) ' Use the MSXML Library to parse the XML. Dim doc As New MSXML2.DOMDocument60 If doc.loadXML(notebookXml) Then ' Find all the Notebook nodes in the one namespace. Dim nodes As MSXML2.IXMLDOMNodeList nodes = doc.documentElement.selectNodes("//one:Notebook") Dim node As MSXML2.IXMLDOMNode Dim noteBookName As String Dim temp As String = "" ' Walk the collection of Notebooks. For Each node In nodes noteBookName = node.attributes.getNamedItem("name").text RichTextBox1.AppendText("Notebook Name and Location: " & vbCrLf & " " & noteBookName & " is at " & node.attributes.getNamedItem("path").text) RichTextBox1.AppendText("Additional data for " & noteBookName) RichTextBox1.AppendText(" ID: " & node.attributes.getNamedItem("ID").text) ' Not all notebooks will have all additional data below. RichTextBox1.AppendText(" Color: " & GetAttributeValueFromNode(node, "color")) RichTextBox1.AppendText(" Is Unread: " & GetAttributeValueFromNode(node, "isUnread")) RichTextBox1.AppendText(" Last Modified: " & GetAttributeValueFromNode(node, "lastModifiedTime")) Next Else MsgBox("OneNote 2010 XML Data failed to load.") End If End Sub Private Function GetAttributeValueFromNode(node As MSXML2.IXMLDOMNode, attributeName As String) As String If node.attributes.getNamedItem(attributeName) Is Nothing Then GetAttributeValueFromNode = "Not found." Else GetAttributeValueFromNode = node.attributes.getNamedItem(attributeName).text End If End Function End Class
Bonjour,
J'ai réussi à extraire des données d'un fichier onenote.
Je suis passé par la librairie XmlDocument puisqu'avec MSXML2 cela ne fonctionnait pas.
Voila le code que j'ai utilisé :
J'ai néanmoins un nouveau soucis. Le texte extrait ne prend pas en compte les tabulations (logique). D'aprés je que je comprends, cela se joue sur l'identation des balises one:OEChildren dans le xml extrait :
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51 Imports oneNote = Microsoft.Office.Interop.OneNote Imports System.Xml Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load Dim nom_fichier As String = "Onenote1" Dim nom_section As String = "Section1" Dim nom_page As String = "Page1" Dim oneNoteAPP As New oneNote.Application Dim NotebooksXml As String = "" Dim NotebookXml As String = "" Dim SectionXml As String = "" Dim PageXml As String = "" oneNoteAPP.GetHierarchy("", oneNote.HierarchyScope.hsNotebooks, NotebooksXml, oneNote.XMLSchema.xsCurrent) Dim xmlDoc As New XmlDocument xmlDoc.LoadXml(NotebooksXml.ToString()) Dim node_Notebooks As XmlNodeList = xmlDoc.DocumentElement.SelectNodes(" //* ") Dim nsm As New XmlNamespaceManager(xmlDoc.NameTable) nsm.AddNamespace("one", node_Notebooks(0).Attributes.ItemOf("xmlns:one").InnerText) For Each node_Notebook As XmlNode In xmlDoc.DocumentElement.SelectNodes("//one:Notebook", nsm) If node_Notebook.Attributes.ItemOf("name").InnerText = nom_fichier Then oneNoteAPP.GetHierarchy(node_Notebook.Attributes.ItemOf("ID").InnerText, oneNote.HierarchyScope.hsSections, NotebookXml, oneNote.XMLSchema.xsCurrent) Exit For End If Next xmlDoc.LoadXml(NotebookXml.ToString()) For Each node_Section As XmlNode In xmlDoc.DocumentElement.SelectNodes("//one:Section", nsm) If node_Section.Attributes.ItemOf("name").InnerText = nom_section Then oneNoteAPP.GetHierarchy(node_Section.Attributes.ItemOf("ID").InnerText, oneNote.HierarchyScope.hsPages, SectionXml, oneNote.XMLSchema.xsCurrent) Exit For End If Next xmlDoc.LoadXml(SectionXml.ToString()) For Each node_Page As XmlNode In xmlDoc.DocumentElement.SelectNodes("//one:Page", nsm) If node_Page.Attributes.ItemOf("name").InnerText = nom_page Then oneNoteAPP.GetHierarchy(node_Page.Attributes.ItemOf("ID").InnerText, oneNote.HierarchyScope.hsChildren, PageXml, oneNote.XMLSchema.xsCurrent) Exit For End If Next xmlDoc.LoadXml(PageXml.ToString()) For Each node_T As XmlNode In xmlDoc.DocumentElement.SelectNodes("//one:T", nsm) RichTextBox1.AppendText(node_T.InnerText & vbCr) Next End Sub
Le but est d'afficher le contenu d'une page onenote dans un richtextbox, tout en gérant, dans l'idéal, la mise en forme du texte (J'arrive deja a faire cela avec un fichier.txt et des balises perso).
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 <?xml version="1.0"?> <one:Page xmlns:one="http://schemas.microsoft.com/office/onenote/2013/onenote" ID="{43E32C16-9604-0647-2A12-849EDC262CE0}{1}{E1955462102832091063381969642650783235869461}" name="Puces" dateTime="2018-06-01T08:47:06.000Z" lastModifiedTime="2018-06-01T08:53:30.000Z" pageLevel="1" isCurrentlyViewed="true" lang="fr"> <one:QuickStyleDef index="0" name="PageTitle" fontColor="automatic" highlightColor="automatic" font="Arial" fontSize="20.0" spaceBefore="0.0" spaceAfter="0.0"/> <one:QuickStyleDef index="1" name="p" fontColor="automatic" highlightColor="automatic" font="Arial" fontSize="11.0" spaceBefore="0.0" spaceAfter="0.0"/> <one:PageSettings RTL="false" color="automatic"> <one:PageSize> <one:Automatic/> </one:PageSize> <one:RuleLines visible="false"/> </one:PageSettings> <one:Title lang="fr"> <one:OE author="" authorInitials="" authorResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" lastModifiedBy="" lastModifiedByInitials="" lastModifiedByResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" creationTime="2018-06-01T08:47:12.000Z" lastModifiedTime="2018-06-01T08:47:12.000Z" objectID="{8AB9AABA-FC29-4BE5-A7BA-C896724BE428}{17}{B0}" alignment="left" quickStyleIndex="0"> <one:T> <![CDATA[Puces]]> </one:T> </one:OE> </one:Title> <one:Outline author="" authorInitials="" lastModifiedBy="" lastModifiedByInitials="" lastModifiedTime="2018-06-01T08:53:29.000Z" objectID="{8AB9AABA-FC29-4BE5-A7BA-C896724BE428}{29}{B0}"> <one:Position x="36.0" y="86.4000015258789" z="0"/> <one:Size width="222.4398345947266" height="78.03118133544922"/> <one:OEChildren> <one:OE authorResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" lastModifiedByResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" creationTime="2018-06-01T08:53:09.000Z" lastModifiedTime="2018-06-01T08:53:09.000Z" objectID="{8AB9AABA-FC29-4BE5-A7BA-C896724BE428}{169}{B0}" alignment="left" quickStyleIndex="1"> <one:List> <one:Bullet bullet="2" fontSize="11.0"/> </one:List> <one:T> <![CDATA[Puce lvl 0]]> </one:T> <one:OEChildren> <one:OE authorResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" lastModifiedByResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" creationTime="2018-06-01T08:53:11.000Z" lastModifiedTime="2018-06-01T08:53:11.000Z" objectID="{8AB9AABA-FC29-4BE5-A7BA-C896724BE428}{75}{B0}" alignment="left" quickStyleIndex="1"> <one:List> <one:Bullet bullet="3" fontSize="11.0"/> </one:List> <one:T> <![CDATA[Puce lvl 1]]> </one:T> <one:OEChildren> <one:OE authorResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" lastModifiedByResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" creationTime="2018-06-01T08:53:13.000Z" lastModifiedTime="2018-06-01T08:53:13.000Z" objectID="{8AB9AABA-FC29-4BE5-A7BA-C896724BE428}{35}{B0}" alignment="left" quickStyleIndex="1"> <one:List> <one:Bullet bullet="13" fontSize="11.0"/> </one:List> <one:T> <![CDATA[Puce lvl 2]]> </one:T> <one:OEChildren> <one:OE authorResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" lastModifiedByResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" creationTime="2018-06-01T08:53:15.000Z" lastModifiedTime="2018-06-01T08:53:15.000Z" objectID="{8AB9AABA-FC29-4BE5-A7BA-C896724BE428}{36}{B0}" alignment="left" quickStyleIndex="1"> <one:List> <one:Bullet bullet="14" fontSize="11.0"/> </one:List> <one:T> <![CDATA[Puce lvl 3]]> </one:T> <one:OEChildren> <one:OE authorResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" lastModifiedByResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" creationTime="2018-06-01T08:53:19.000Z" lastModifiedTime="2018-06-01T08:53:19.000Z" objectID="{8AB9AABA-FC29-4BE5-A7BA-C896724BE428}{134}{B0}" alignment="left" quickStyleIndex="1"> <one:List> <one:Bullet bullet="9" fontSize="11.0"/> </one:List> <one:T> <![CDATA[Puce lvl 4]]> </one:T> <one:OEChildren> <one:OE authorResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" lastModifiedByResolutionID="<resolutionId provider="None" hash="N9VtndxBzReRJUGOlK+5yw=="><localId name=""/></resolutionId>" creationTime="2018-06-01T08:53:20.000Z" lastModifiedTime="2018-06-01T08:53:20.000Z" objectID="{8AB9AABA-FC29-4BE5-A7BA-C896724BE428}{154}{B0}" alignment="left" quickStyleIndex="1"> <one:List> <one:Bullet bullet="7" fontSize="11.0"/> </one:List> <one:T> <![CDATA[Puce lvl 5]]> </one:T> </one:OE> </one:OEChildren> </one:OE> </one:OEChildren> </one:OE> </one:OEChildren> </one:OE> </one:OEChildren> </one:OE> </one:OEChildren> </one:OE> </one:OEChildren> </one:Outline> </one:Page>
Le contenu du one note est une liste de puces à 6 niveaux. Je souhaiterai donc obtenir :
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6 "\f1\fs18\u-5573?\f0\fs24 " & "Puce lvl 0" vbTab & "\f1\fs18\u-8190?\f0\fs24 " & "Puce lvl 1" vbTab & vbTab & "\f1\fs18\u-6347?\f0\fs24 " & "Puce lvl 2" vbTab & vbTab & vbTab & "\f1\fs18\u-5574?\f0\fs24 " & "Puce lvl 3" vbTab & vbTab & vbTab & vbTab & "\f1\fs18\u-8189?\f0\fs24 " & "Puce lvl 4" vbTab & vbTab & vbTab & vbTab & vbTab & "\f1\fs18\u-6348?\f0\fs24 " & "Puce lvl 5"
J'ai réussi à réaliser une fonction récursive qui permet de parcourir l'ensemble des balises du xml. En revanche, je ne parviens pas a faire en sorte de rajouter mes tabulations au dela du second niveau d'indentation.
Voici la fonction :
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13 Private Sub TraverseNodes(nodes As XmlNodeList) For Each node As XmlNode In nodes If node.Name = "one:T" Then If node.ParentNode.ParentNode.ParentNode.ParentNode.Name = "one:OEChildren" Then RichTextBox1.AppendText(vbTab) End If If node.ParentNode.ParentNode.Name <> "one:Title" Then RichTextBox1.AppendText(node.InnerText & vbCr) End If End If TraverseNodes(node.ChildNodes) Next End Sub
Voila ma solution
Code : Sélectionner tout - Visualiser dans une fenêtre à part
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87 Imports oneNote = Microsoft.Office.Interop.OneNote Imports System.Xml Imports System.IO Imports System.Text Public Class Form1 Private Sub Form1_Load(sender As Object, e As EventArgs) Handles MyBase.Load Load_Onenote("Onenote1", "Section1", "Page1") End Sub Private Sub RichTextBox1_LinkClicked(sender As Object, e As LinkClickedEventArgs) Handles RichTextBox1.LinkClicked Process.Start(e.LinkText) End Sub Private Sub Load_Onenote(nom_fichier As String, nom_section As String, nom_page As String) Dim oneNoteAPP As New oneNote.Application Dim NotebooksXml As String = "" oneNoteAPP.GetHierarchy("", oneNote.HierarchyScope.hsNotebooks, NotebooksXml, oneNote.XMLSchema.xsCurrent) If NotebooksXml = "" Then Exit Sub Dim xmlDoc As New XmlDocument xmlDoc.LoadXml(NotebooksXml.ToString()) Dim node_Notebooks As XmlNodeList = xmlDoc.DocumentElement.SelectNodes(" //* ") Dim nsm As New XmlNamespaceManager(xmlDoc.NameTable) nsm.AddNamespace("one", node_Notebooks(0).Attributes.ItemOf("xmlns:one").InnerText) Dim NotebookXml As String = "" For Each node_Notebook As XmlNode In xmlDoc.DocumentElement.SelectNodes("//one:Notebook", nsm) If node_Notebook.Attributes.ItemOf("name").InnerText = nom_fichier Then oneNoteAPP.GetHierarchy(node_Notebook.Attributes.ItemOf("ID").InnerText, oneNote.HierarchyScope.hsSections, NotebookXml, oneNote.XMLSchema.xsCurrent) Exit For End If Next If NotebookXml = "" Then Exit Sub Dim SectionXml As String = "" xmlDoc.LoadXml(NotebookXml.ToString()) For Each node_Section As XmlNode In xmlDoc.DocumentElement.SelectNodes("//one:Section", nsm) If node_Section.Attributes.ItemOf("name").InnerText = nom_section Then oneNoteAPP.GetHierarchy(node_Section.Attributes.ItemOf("ID").InnerText, oneNote.HierarchyScope.hsPages, SectionXml, oneNote.XMLSchema.xsCurrent) Exit For End If Next If SectionXml = "" Then Exit Sub Dim PageXml As String = "" xmlDoc.LoadXml(SectionXml.ToString()) For Each node_Page As XmlNode In xmlDoc.DocumentElement.SelectNodes("//one:Page", nsm) If node_Page.Attributes.ItemOf("name").InnerText = nom_page Then node_Page.Normalize() oneNoteAPP.GetHierarchy(node_Page.Attributes.ItemOf("ID").InnerText, oneNote.HierarchyScope.hsChildren, PageXml, oneNote.XMLSchema.xsCurrent) Exit For End If Next If PageXml = "" Then Exit Sub xmlDoc.LoadXml(PageXml.ToString()) Dim header As String = "{\rtf1\ansi\ansicpg1252\uc1\deff0\deftab360 {\fonttbl{\f1 Segoe MDL2 Assets;}{\f2 \ftech Wingdings;}{\f3 \ftech Wingdings 2;}{\f4 \ftech Wingdings 3;}{\f5 \ftech Symbol;}{\f6 \ftech Webdings;}}\fs24" Dim sb = New System.Text.StringBuilder() sb.Append(header) sb.Append(TraverseNodes(xmlDoc.ChildNodes)) sb.Append("\par") sb.Append("}") Me.RichTextBox1.Rtf = sb.ToString() End Sub Private Function TraverseNodes(nodes As XmlNodeList) As String TraverseNodes = "" For Each node As XmlNode In nodes If node.Name = "one:T" Then Dim nodeParent As XmlNode nodeParent = node.ParentNode.ParentNode.ParentNode.ParentNode While nodeParent.Name = "one:OEChildren" TraverseNodes &= "\tab" nodeParent = nodeParent.ParentNode.ParentNode If nodeParent Is Nothing Then Exit While End While TraverseNodes &= node.InnerText & "\par " End If TraverseNodes &= TraverseNodes(node.ChildNodes) Next End Function End Class
Partager