Commit 423b2476 by 陶然

init

parent bed5413e
...@@ -3,3 +3,4 @@ ...@@ -3,3 +3,4 @@
################################################################################ ################################################################################
/Src/obj/Debug /Src/obj/Debug
/Src/bin/Debug
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis
{
internal static class Configs
{
public const string TableAnalyseName = "PDF_Analysis";
public const string TableAuthorName = "PDF_Authors";
public const string TableKeywordName = "PDF_Keywords";
public const string TableOrganName = "PDF_Organs";
public const string TableDisAuthor = "PDF_DisAuthors";
public const string TableDisTrend = "PDF_DisTrend";
public const string TableDisCoauthor = "PDF_DisCoauthor";
public const string TableDisOrgan = "PDF_DisOrgan";
public const string TableDisCoOrgan = "PDF_DisCoOrgan";
public const string TableDisCountry = "PDF_DisCountry";
public const string TableDisCoCountry = "PDF_DisCoCountry";
public const string TableDisSource = "PDF_DisSource";
public const string TableDisFund = "PDF_DisFund";
public const string TableDisCoKeyword = "PDF_DisCoKeyword";
public const string TableDisTimeTopic = "PDF_DisTimeTopics";
public const string TableDisTopicEvolution = "PDF_DisTopicEvolutions";
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
public class AnalyseBase : EntityWithMetadata
{
[ApiMember(Description = "文献哈希值")]
[IgnoreUpdate]
[StringLength(50)]
public string HashCode { get; set; }
[ApiMember(Description = "文件名")]
[StringLength(200)]
public string FileName { get; set; }
[ApiMember(Description = "标题")]
[StringLength(500)]
public string Title { get; set; }
[ApiMember(Description = "类别")]
[StringLength(50)]
public string Type { get; set; }
[ApiMember(Description = "分类")]
[StringLength(50)]
public string Category { get; set; }
[ApiMember(Description = "期刊")]
public string Source { get; set; }
[ApiMember(Description = "作者")]
public string Author { get; set; }
[ApiMember(Description = "关键词")]
public string Keywords { get; set; }
[ApiMember(Description = "发表日期")]
public DateTime? PublishTime { get; set; }
[ApiMember(Description = "机构")]
[StringLength(500)]
public string Organization { get; set; }
[ApiMember(Description = "国家")]
public string Country { get; set; }
[ApiMember(Description = "基金")]
[StringLength(1000)]
public string Fund { get; set; }
[ApiMember(Description = "摘要")]
[StringLength(int.MaxValue)]
public string Abstract { get; set; }
[ApiMember(Description = "正文")]
[StringLength(int.MaxValue)]
public string Mainbody { get; set; }
[ApiMember(Description = "全文")]
[InternalSetter]
[StringLength(int.MaxValue)]
public string Allbody { get; set; }
[ApiMember(Description = "参考文献")]
[StringLength(int.MaxValue)]
public string Reference { get; set; }
[ApiMember(Description = "导入时间")]
public DateTime ImportTime { get; set; }
[ApiMember(Description = "导入路径")]
[InternalSetter]
[StringLength(200)]
public string ImportPath { get; set; }
[ApiMember(Description = "导入人")]
public string ImportUserName { get; set; }
[CurrentMemberKvid]
public Guid ImportUserKvid { get; set; }
[ApiMember(Description = "解析时间")]
public DateTime? AnlysisTime { get; set; }
[ApiMember(Description = "解析人")]
[CurrentMemberName]
public string AnlysisUserName { get; set; }
[CurrentMemberKvid]
public Guid AnlysisUserKvid { get; set; }
[ApiMember(Description = "是否解析")]
[InternalSetter]
public bool IsAnlysis { get; set; }
[ApiMember(Description = "复核时间")]
public DateTime? CheckTime { get; set; }
[ApiMember(Description = "复核人")]
public string CheckUserName { get; set; }
[CurrentMemberKvid]
public Guid CheckUserKvid { get; set; }
[ApiMember(Description = "是否复核")]
[InternalSetter]
public bool IsChecked { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
}
/// <summary>
/// 导入系统:0 全文解析:25 确认后:50 复核后:100
/// </summary>
[Alias(Configs.TableAnalyseName)]
[Api(Description ="文献解析表")]
public class Analyse :AnalyseBase,IEntityInAssemblyDb
{
[Ignore]
public new string Allbody { get; set; }
}
/// <summary>
/// 导入系统:0 全文解析:25 确认后:50 复核后:100
/// </summary>
[Alias(Configs.TableAnalyseName)]
[Api(Description = "文献解析表")]
public class AnalyseAll : AnalyseBase, IEntityInAssemblyDb
{
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableAuthorName)]
[Api(Description = "作者表")]
public class Author:EntityWithMetadata
{
[ApiMember(Description = "作者")]
[Required]
public string Name { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[DefaultEmptyGuid]
public Guid AnalyseKvid { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisAuthor)]
[Api(Description = "作者分布")]
public class DisAuthor:EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "年份")]
public string Year { get; set; }
[ApiMember(Description = "作者")]
public string Author { get; set; }
[ApiMember(Description = "作者发文量")]
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisCoCountry)]
[Api(Description = "国家合作分布")]
public class DisCoCountry : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "国家1")]
public string Country1 { get; set; }
[ApiMember(Description = "国家2")]
public string Country2 { get; set; }
[ApiMember(Description = "国家合作频次")]
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisCoKeyword)]
[Api(Description = "关键词共现")]
public class DisCoKeyword : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "关键词1")]
public string KeyWord1 { get; set; }
[ApiMember(Description = "关键词2")]
public string KeyWord2 { get; set; }
[ApiMember(Description = "关键词共现频次")]
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisCoOrgan)]
[Api(Description = "机构合作分布")]
public class DisCoOrgran : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "机构1")]
public string Organ1 { get; set; }
[ApiMember(Description = "机构2")]
public string Organ2 { get; set; }
[ApiMember(Description = "机构合作频次")]
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisCoauthor)]
[Api(Description = "作者合作分布")]
public class DisCoauthor : EntityWithMetadata
{
public string DomainName { get; set; }
public string Language { get; set; }
public string Author1 { get; set; }
public string Author2 { get; set; }
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisCountry)]
[Api(Description = "国家分布")]
public class DisCountry : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "年份")]
public string Year { get; set; }
[ApiMember(Description = "国家")]
public string Country { get; set; }
[ApiMember(Description = "国家发表文献数量")]
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisFund)]
[Api(Description = "基金分布")]
public class DisFund : EntityWithMetadata
{
public string DomainName { get; set; }
public string Language { get; set; }
public string Year { get; set; }
public string Fund { get; set; }
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisOrgan)]
[Api(Description = "机构分布")]
public class DisOrgan : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "年份")]
public string Year { get; set; }
[ApiMember(Description = "机构")]
public string Organ { get; set; }
[ApiMember(Description = "机构发表文献数量")]
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisSource)]
[Api(Description = "来源分布")]
public class DisSource : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "年份")]
public string Year { get; set; }
[ApiMember(Description = "期刊")]
public string Source { get; set; }
[ApiMember(Description = "期刊发表文献数量")]
public int Count { get; set; }
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Kivii;
using Kivii.DataAnnotations;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisTimeTopic)]
[Api(Description = "时序主题分布")]
public class DisTimeTopic : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "年份")]
public string Year { get; set; }
[ApiMember(Description = "主题编号")]
public string Topic { get; set; }
[ApiMember(Description = "主题词TOP10")]
public string TopicWord { get; set; }
[ApiMember(Description = "主题词概率TOP10")]
public string TopicWordProbability { get; set; }
[ApiMember(Description = "主题支持文档列表")]
[StringLength(5000)]
public string Documentlist { get; set; }
[ApiMember(Description = "训练参数")]
public string Parameter { get; set; }
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Kivii;
using Kivii.DataAnnotations;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisTimeTopic)]
[Api(Description = "TableDisTopicEvolution")]
public class DisTopicEvolution : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "源年份")]
public string SourceYear { get; set; }
[ApiMember(Description = "目标年份")]
public string TargetYear { get; set; }
[ApiMember(Description = "源主题编号")]
public string SourceTopic { get; set; }
[ApiMember(Description = "目标主题编号")]
public string TargeTopic { get; set; }
[ApiMember(Description = "源主题词")]
public string SourceTopicWord { get; set; }
[ApiMember(Description = "目标主题词")]
public string TargetTopicWord { get; set; }
[ApiMember(Description = "余弦相似度")]
public string CosSim { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableDisTrend)]
[Api(Description = "年份分布")]
public class DisTrend : EntityWithMetadata
{
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "年份")]
public string Year { get; set; }
[ApiMember(Description = "文献数量")]
public int Count { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableKeywordName)]
[Api(Description = "关键词表")]
public class Keyword : EntityWithMetadata
{
[ApiMember(Description = "关键词")]
[Required]
public string Title { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[DefaultEmptyGuid]
public Guid AnalyseKvid { get; set; }
}
}
using Kivii;
using Kivii.DataAnnotations;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Entities
{
[Alias(Configs.TableOrganName)]
[Api(Description = "机构表")]
public class Organ : EntityWithMetadata
{
[ApiMember(Description = "机构名称")]
[Required]
public string Title { get; set; }
[ApiMember(Description = "语言")]
public string Language { get; set; }
[ApiMember(Description = "所属领域")]
public string DomainName { get; set; }
[DefaultEmptyGuid]
public Guid AnalyseKvid { get; set; }
}
}
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis
{
public class AnalyseExtension
{
public static string ComputeMD5(Stream fileName)
{
String rtns = String.Empty;
//计算文件的MD5值
System.Security.Cryptography.MD5 calculator = System.Security.Cryptography.MD5.Create();
Byte[] buffer = calculator.ComputeHash(fileName);
calculator.Clear();
//将字节数组转换成十六进制的字符串形式
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < buffer.Length; i++)
{
stringBuilder.Append(buffer[i].ToString("x2"));
}
rtns = stringBuilder.ToString();
return rtns;
}
}
}
using Kivii;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Features
{
public class AnalyseFeature : IPlugin,IPreInitPlugin
{
public void Configure(IAppHost appHost)
{
Kivii.PdfBox.Instance.Initialize();
}
public void Register(IAppHost appHost)
{
}
}
}
using Kivii;
using Kivii.Linq;
using Kivii.Pdf;
using Njust.Pdf.Analysis.Entities;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Jobs
{
public class AnalyseJob : IJob
{
public string Name
{
get
{
return "使用PDFBox对PDF进行全文抓取";
}
}
public string Description
{
get
{
return "使用PDFBox对PDF进行全文抓取";
}
}
public IJobContext TaskContext { get; set; }
public void Dispose()
{
}
public bool Execution()
{
return textPdfBox();
}
private bool textPdfBox()
{
//var text1 = "\x9f\x0f\x12";
Regex regex = new Regex(@"/\\x[\da-zA-Z]+/g");
//text1 = regex.Replace(text1, "");
int succeed = 0, total = 0;
var conn = KiviiContext.GetOpenedDbConnection<AnalyseAll>();
try
{
var query = conn.From<AnalyseAll>();
query.Take(10);
query.Skip(0);
query.Where(o => o.Status < 4);//如果异常了,会重试
query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid);
var entities = conn.Select(query);
if (entities.IsNullOrEmpty())
{
TaskContext.Message = "无数据需要处理";
return true;
}
total = entities.Count;
var queryPdfEntityFile = conn.From<EntityDbFile<Analyse>>();
queryPdfEntityFile.Where(o => o.ParentKvid == Guid.Empty & o.DbFolderPath == "/Pdf/Analysis" & o.Extension.ToLower() == ".pdf" & Sql.In(o.OwnerKvid, entities.ConvertAll(p => p.Kvid)));
var pdfEntityFiles = conn.Select(queryPdfEntityFile);
foreach (var item in entities)
{
try
{
var pdfFile = pdfEntityFiles.FirstOrDefault(o => o.OwnerKvid == item.Kvid);
if (pdfFile == null) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
var physicalStorageFilePath = pdfFile.GetPhysicalPath();
if (!File.Exists(physicalStorageFilePath)) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
var text = Kivii.PdfBox.Instance.ReadText(physicalStorageFilePath);
text = regex.Replace(text, "");
item.Allbody = text;
item.AddOnlyProperties(o => o.Allbody);
//item.AnlysisTime = DateTime.Now;
//item.AddOnlyProperties(o => o.AnlysisTime);
//item.AnlysisUserName = KiviiContext.CurrentMember.FullName;
//item.AddOnlyProperties(o => o.AnlysisUserName);
//item.IsAnlysis = true;
//item.AddOnlyProperties(o => o.IsAnlysis);
item.Status = 25;//解析了全文 后状态为50
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
succeed++;
#region 图片
var imgPath = $"/Storages/Pdf/Analysis/Images/{item.Kvid}";
var reader = new PdfReader(physicalStorageFilePath);
for (var i = 0; i < reader.XrefSize; i++)
{
var pdfObj = reader.GetPdfObject(i);
if (pdfObj == null || !pdfObj.IsStream()) continue;
var pdfStream = (PdfStream)pdfObj;
var subType = pdfStream.Get(PdfName.SUBTYPE);
if (subType == null || subType.ToString() != PdfName.IMAGE.ToString()) continue;
var bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStream);
if (bytes == null) continue;
try
{
var ms = new MemoryStream(bytes);
ms.Position = 0;
KiviiContext.VirtualFiles.WriteFile($"{imgPath}/{i}.jpg", ms);
//var img = System.Drawing.Image.FromStream(ms);
//img.Save(Path.Combine(tbxExportImagesPath.Text, $"{i}.jpg"), System.Drawing.Imaging.ImageFormat.Jpeg);
}
catch
{ }
}
#endregion
}
catch (Exception ex)
{
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}";
item.Status += 1;
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
return false;
}
}
}
catch (Exception ex)
{
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}";
return false;
}
TaskContext.Message = $"完成:共处理{succeed}/{total}";
return true;
}
}
}
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<Import Project="$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props" Condition="Exists('$(MSBuildExtensionsPath)\$(MSBuildToolsVersion)\Microsoft.Common.props')" />
<PropertyGroup>
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
<ProjectGuid>{759281EA-6CAF-4BC4-A976-47D16AA57ACE}</ProjectGuid>
<OutputType>Library</OutputType>
<AppDesignerFolder>Properties</AppDesignerFolder>
<RootNamespace>Njust.Pdf.Analysis</RootNamespace>
<AssemblyName>Njust.Pdf.Analysis.V4.5</AssemblyName>
<TargetFrameworkVersion>v4.5</TargetFrameworkVersion>
<FileAlignment>512</FileAlignment>
<Deterministic>true</Deterministic>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' ">
<DebugSymbols>true</DebugSymbols>
<DebugType>full</DebugType>
<Optimize>false</Optimize>
<OutputPath>bin\Debug\</OutputPath>
<DefineConstants>DEBUG;TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' ">
<DebugType>pdbonly</DebugType>
<Optimize>true</Optimize>
<OutputPath>bin\Release\</OutputPath>
<DefineConstants>TRACE</DefineConstants>
<ErrorReport>prompt</ErrorReport>
<WarningLevel>4</WarningLevel>
</PropertyGroup>
<ItemGroup>
<Reference Include="Kivii.Common.V4.5, Version=5.6.2021.5080, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\Njust\Njust\packages\Kivii.Common.5.6.2021.5080\lib\net45\Kivii.Common.V4.5.dll</HintPath>
</Reference>
<Reference Include="Kivii.Core.V4.5, Version=5.6.2021.5080, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\Njust\Njust\packages\Kivii.Core.5.6.2021.5080\lib\net45\Kivii.Core.V4.5.dll</HintPath>
</Reference>
<Reference Include="Kivii.Linq.V4.5, Version=5.6.2021.4260, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\Njust\Njust\packages\Kivii.Linq.5.6.2021.4260\lib\net45\Kivii.Linq.V4.5.dll</HintPath>
</Reference>
<Reference Include="Kivii.Pdf.V4.5, Version=5.5.2020.11000, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\..\Kivii.K5\packages\Kivii.Pdf.5.5.2020.11000\lib\net45\Kivii.Pdf.V4.5.dll</HintPath>
</Reference>
<Reference Include="Kivii.PdfBox.V4.5, Version=5.6.2021.6000, Culture=neutral, processorArchitecture=MSIL">
<HintPath>..\..\Kivii.K5\packages\Kivii.PdfBox.5.6.2021.6000\lib\net45\Kivii.PdfBox.V4.5.dll</HintPath>
</Reference>
<Reference Include="System" />
<Reference Include="System.Core" />
<Reference Include="System.Xml.Linq" />
<Reference Include="System.Data.DataSetExtensions" />
<Reference Include="Microsoft.CSharp" />
<Reference Include="System.Data" />
<Reference Include="System.Net.Http" />
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
<Compile Include="Configs.cs" />
<Compile Include="Entities\Analyse.cs" />
<Compile Include="Entities\Author.cs" />
<Compile Include="Entities\DisAuthor.cs" />
<Compile Include="Entities\DisCoauthor.cs" />
<Compile Include="Entities\DisCoCountry.cs" />
<Compile Include="Entities\DisCoKeyword.cs" />
<Compile Include="Entities\DisCoOrgran.cs" />
<Compile Include="Entities\DisCountry.cs" />
<Compile Include="Entities\DisFund.cs" />
<Compile Include="Entities\DisOrgran.cs" />
<Compile Include="Entities\DisSource.cs" />
<Compile Include="Entities\DisTimeTopic.cs" />
<Compile Include="Entities\DisTopicEvolution.cs" />
<Compile Include="Entities\DisTrend.cs" />
<Compile Include="Entities\Keyword.cs" />
<Compile Include="Entities\Organ.cs" />
<Compile Include="Extensions\AnalyseExtension.cs" />
<Compile Include="Features\AnalyseFeature.cs" />
<Compile Include="Jobs\AnalyseJob.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Tranforms\RestfulAnalyse.cs" />
<Compile Include="Tranforms\RestfulDisAnalyse.cs" />
<Compile Include="Tranforms\RestfulDistribution.cs" />
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
<Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
</Project>
\ No newline at end of file
using Njust.Pdf.Analysis.Features;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
// 有关程序集的一般信息由以下
// 控制。更改这些特性值可修改
// 与程序集关联的信息。
[assembly: AssemblyTitle("Njust.Pdf.Analysis.V4.5")]
[assembly: AssemblyDescription("")]
[assembly: AssemblyConfiguration("")]
[assembly: AssemblyCompany("")]
[assembly: AssemblyProduct("Njust.Pdf.Analysis.V4.5")]
[assembly: AssemblyCopyright("Copyright © 2021")]
[assembly: AssemblyTrademark("")]
[assembly: AssemblyCulture("")]
[assembly:AssemblyLoadFeature(typeof(AnalyseFeature))]
// 将 ComVisible 设置为 false 会使此程序集中的类型
//对 COM 组件不可见。如果需要从 COM 访问此程序集中的类型
//请将此类型的 ComVisible 特性设置为 true。
[assembly: ComVisible(false)]
// 如果此项目向 COM 公开,则下列 GUID 用于类型库的 ID
[assembly: Guid("759281ea-6caf-4bc4-a976-47d16aa57ace")]
// 程序集的版本信息由下列四个值组成:
//
// 主版本
// 次版本
// 生成号
// 修订号
//
//可以指定所有这些值,也可以使用“生成号”和“修订号”的默认值
//通过使用 "*",如下所示:
// [assembly: AssemblyVersion("1.0.*")]
[assembly: AssemblyVersion("1.0.0.0")]
[assembly: AssemblyFileVersion("1.0.0.0")]
using Kivii;
using Kivii.Linq;
using Kivii.Web;
using Njust.Pdf.Analysis.Entities;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Tranforms
{
public class DisAuthorAnalysing:RestfulExecution<DisAuthor>
{
public override object OnExecution(IRequest req, IResponse res)
{
var rtns = new RestfulQueryResponse<DisAuthor>();
rtns.Results = new List<DisAuthor>();
var conn = KiviiContext.GetOpenedDbConnection<Author>();
var query = conn.From<Author>();
var allAuthors = conn.Select(query);
var group = allAuthors.GroupBy(o => new { o.Name, o.DomainName, o.Language });
conn.InitEntityType<DisAuthor>();
conn.Delete<DisAuthor>(o => o.Kvid == o.Kvid);
//var queryExistDisAuthors = conn.From<DisAuthor>();
//queryExistDisAuthors.Where(o => Sql.In(o.Author, group.ToList().ConvertAll(p => p.Key.Name)));
//var existDisAuthors = conn.Select(queryExistDisAuthors);
foreach (var kv in group)
{
//var exist = existDisAuthors.FirstOrDefault(o => o.Author == kv.Key.Name & o.DomainName == kv.Key.DomainName & o.Language == kv.Key.Language);
//if (exist == null)
//{
var disAuthor = new DisAuthor();
disAuthor.Language = kv.Key.Language;
disAuthor.DomainName = kv.Key.DomainName;
disAuthor.Author = kv.Key.Name;
disAuthor.Count = kv.Count();
conn.Insert(disAuthor);
disAuthor.RemoveAllOnlyProperties();
rtns.Results.Add(disAuthor);
// continue;
//}
//exist.Count = kv.Count();
//exist.AddOnlyProperties(o => o.Count);
//conn.UpdateOnly(exist);
//rtns.Results.Add(exist);
}
rtns.Total = rtns.Results.Count();
return rtns;
}
}
public class DisCoauthorAnalysing : RestfulExecution<DisCoauthor>
{
public override object OnExecution(IRequest req, IResponse res)
{
var rtns = new RestfulQueryResponse<DisCoauthor>();
rtns.Results = new List<DisCoauthor>();
var conn = KiviiContext.GetOpenedDbConnection<Author>();
var query = conn.From<Author>();
query.OrderBy(o => o.Name);
var allAuthors = conn.Select(query);
conn.InitEntityType<DisCoauthor>();
conn.Delete<DisCoauthor>(o => o.Kvid == o.Kvid);
var group = allAuthors.GroupBy(o => o.AnalyseKvid);
var disCoauthors = new List<DisCoauthor>();
foreach(var kv in group)
{
if (kv.Count() < 2) continue;
var items = kv.ToList();
items = items.OrderBy(o => o.Name).ToList();
for(var i = 0; i < items.Count() - 1; i++)
{
for(var j = 1; j < items.Count() - i; j++)
{
var disCoauthor = new DisCoauthor();
disCoauthor.DomainName = items[i].DomainName;
disCoauthor.Language = items[i].Language;
disCoauthor.Author1 = items[i].Name;
disCoauthor.Author2 = items[i + j].Name;
disCoauthors.Add(disCoauthor);
}
}
}
var groupDisCoauthor = disCoauthors.GroupBy(o => new { o.DomainName, o.Language, o.Author1, o.Author2 });
foreach(var kv in groupDisCoauthor)
{
var disCoauthor = new DisCoauthor();
disCoauthor.DomainName = kv.Key.DomainName;
disCoauthor.Language = kv.Key.Language;
disCoauthor.Author1 = kv.Key.Author1;
disCoauthor.Author2 = kv.Key.Author2;
disCoauthor.Count = kv.Count();
conn.Insert(disCoauthor);
disCoauthor.RemoveAllOnlyProperties();
rtns.Results.Add(disCoauthor);
}
rtns.Total = rtns.Results.Count();
return rtns;
}
}
}
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="Kivii.Common" version="5.6.2021.5080" targetFramework="net45" />
<package id="Kivii.Core" version="5.6.2021.5080" targetFramework="net45" />
<package id="Kivii.Linq" version="5.6.2021.4260" targetFramework="net45" />
<package id="Kivii.Pdf" version="5.5.2020.11000" targetFramework="net45" />
<package id="Kivii.PdfBox" version="5.6.2021.6000" targetFramework="net45" />
</packages>
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment