Commit 3687ab4d by 陶然

升级优化

parent 9993a834
......@@ -64,8 +64,7 @@ namespace Njust.Pdf.Analysis.Entities
public string Mainbody { get; set; }
[ApiMember(Description = "全文")]
[InternalSetter]
[StringLength(int.MaxValue)]
[Ignore]
public string Allbody { get; set; }
[ApiMember(Description = "参考文献")]
......
......@@ -12,103 +12,103 @@ using System.Threading.Tasks;
namespace Njust.Pdf.Analysis.Jobs
{
public class AnalyseJob : IJob
{
public string Name
{
get
{
return "使用PDFBox对PDF进行全文抓取";
}
}
public string Description
{
get
{
return "使用PDFBox对PDF进行全文抓取";
}
}
public IJobContext TaskContext { get; set; }
public void Dispose()
{
}
public bool Execution()
{
return textPdfBox();
}
private bool textPdfBox()
{
int succeed = 0, total = 0;
var conn = KiviiContext.GetOpenedDbConnection<AnalyseAll>();
try
{
var query = conn.From<AnalyseAll>();
query.Take(5);
query.Skip(0);
query.Where(o => o.IsAnlysis == false);//如果异常了,会重试
query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid);
var entities = conn.Select(query);
if (entities.IsNullOrEmpty())
{
TaskContext.Message = "无数据需要处理";
return true;
}
total = entities.Count;
var queryPdfEntityFile = conn.From<EntityDbFile<Analyse>>();
queryPdfEntityFile.Where(o => o.ParentKvid == Guid.Empty & o.DbFolderPath == "/Pdf/Analysis" & o.Extension.ToLower() == ".pdf" & Sql.In(o.OwnerKvid, entities.ConvertAll(p => p.Kvid)));
var pdfEntityFiles = conn.Select(queryPdfEntityFile);
foreach (var item in entities)
{
try
{
var pdfFile = pdfEntityFiles.FirstOrDefault(o => o.OwnerKvid == item.Kvid);
if (pdfFile == null) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
var physicalStorageFilePath = pdfFile.GetPhysicalPath();
if (!File.Exists(physicalStorageFilePath)) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
Console.WriteLine("-------------------------Pre-------------------------");
Console.WriteLine($"-------------------------{physicalStorageFilePath}-------------------------");
var text = Kivii.PdfBox.Instance.ReadText(physicalStorageFilePath);
Console.WriteLine("-------------------------Ok-------------------------");
item.Allbody = text;
item.AddOnlyProperties(o => o.Allbody);
item.AnlysisTime = DateTime.Now;
item.AddOnlyProperties(o => o.AnlysisTime);
item.AnlysisUserName = KiviiContext.CurrentMember.FullName;
item.AddOnlyProperties(o => o.AnlysisUserName);
item.IsAnlysis = true;
item.AddOnlyProperties(o => o.IsAnlysis);
Console.WriteLine("-------------------------PreUpdateOnly-------------------------");
conn.UpdateOnly(item);
Console.WriteLine("-------------------------UpdateOnly-------------------------");
succeed++;
}
catch (Exception ex)
{
Console.WriteLine("-------------------------Ex-------------------------");
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Kvid:{item.Kvid},Ex:{ex.Message}";
item.Status += 1;
item.AddOnlyProperties(o => o.Status);
conn.UpdateOnly(item);
Console.WriteLine("-------------------------ExUpdate-------------------------");
return false;
}
}
}
catch (Exception ex)
{
TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}";
return false;
}
TaskContext.Message = $"完成:共处理{succeed}/{total}";
return true;
}
}
//public class AnalyseJob : IJob
//{
// public string Name
// {
// get
// {
// return "使用PDFBox对PDF进行全文抓取";
// }
// }
// public string Description
// {
// get
// {
// return "使用PDFBox对PDF进行全文抓取";
// }
// }
// public IJobContext TaskContext { get; set; }
// public void Dispose()
// {
// }
// public bool Execution()
// {
// return textPdfBox();
// }
// private bool textPdfBox()
// {
// int succeed = 0, total = 0;
// var conn = KiviiContext.GetOpenedDbConnection<AnalyseAll>();
// try
// {
// var query = conn.From<AnalyseAll>();
// query.Take(5);
// query.Skip(0);
// query.Where(o => o.IsAnlysis == false);//如果异常了,会重试
// query.OrderBy(o => o.CreateTime).ThenBy(o => o.Kvid);
// var entities = conn.Select(query);
// if (entities.IsNullOrEmpty())
// {
// TaskContext.Message = "无数据需要处理";
// return true;
// }
// total = entities.Count;
// var queryPdfEntityFile = conn.From<EntityDbFile<Analyse>>();
// queryPdfEntityFile.Where(o => o.ParentKvid == Guid.Empty & o.DbFolderPath == "/Pdf/Analysis" & o.Extension.ToLower() == ".pdf" & Sql.In(o.OwnerKvid, entities.ConvertAll(p => p.Kvid)));
// var pdfEntityFiles = conn.Select(queryPdfEntityFile);
// foreach (var item in entities)
// {
// try
// {
// var pdfFile = pdfEntityFiles.FirstOrDefault(o => o.OwnerKvid == item.Kvid);
// if (pdfFile == null) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
// var physicalStorageFilePath = pdfFile.GetPhysicalPath();
// if (!File.Exists(physicalStorageFilePath)) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
// Console.WriteLine("-------------------------Pre-------------------------");
// Console.WriteLine($"-------------------------{physicalStorageFilePath}-------------------------");
// var text = Kivii.PdfBox.Instance.ReadText(physicalStorageFilePath);
// Console.WriteLine("-------------------------Ok-------------------------");
// item.Allbody = text;
// item.AddOnlyProperties(o => o.Allbody);
// item.AnlysisTime = DateTime.Now;
// item.AddOnlyProperties(o => o.AnlysisTime);
// item.AnlysisUserName = KiviiContext.CurrentMember.FullName;
// item.AddOnlyProperties(o => o.AnlysisUserName);
// item.IsAnlysis = true;
// item.AddOnlyProperties(o => o.IsAnlysis);
// Console.WriteLine("-------------------------PreUpdateOnly-------------------------");
// conn.UpdateOnly(item);
// Console.WriteLine("-------------------------UpdateOnly-------------------------");
// succeed++;
// }
// catch (Exception ex)
// {
// Console.WriteLine("-------------------------Ex-------------------------");
// TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Kvid:{item.Kvid},Ex:{ex.Message}";
// item.Status += 1;
// item.AddOnlyProperties(o => o.Status);
// conn.UpdateOnly(item);
// Console.WriteLine("-------------------------ExUpdate-------------------------");
// return false;
// }
// }
// }
// catch (Exception ex)
// {
// TaskContext.Message = $"处理消息异常:共处理{succeed}/{total},Ex:{ex.Message}";
// return false;
// }
// TaskContext.Message = $"完成:共处理{succeed}/{total}";
// return true;
// }
//}
public class AnalyseImageJob : IJob
{
......
......@@ -2,6 +2,7 @@
using Kivii.Linq;
using Kivii.Pdf;
using Kivii.Web;
using Kivii.Zip;
using Njust.Pdf.Analysis.Entities;
using System;
using System.Collections.Generic;
......@@ -120,6 +121,7 @@ namespace Njust.Pdf.Analysis.Tranforms
var exist = exists.FirstOrDefault(o => o.HashCode == item.HashCode);
if (exist != null)
{
stream.Position = 0;
KiviiContext.VirtualFiles.WriteFile(exist.ImportPath, stream);
rtns.Results.Add(exist);
continue;
......@@ -219,8 +221,8 @@ namespace Njust.Pdf.Analysis.Tranforms
}
}
}
exist.Allbody = item.Allbody.Replace("\n", "");
exist.AddOnlyProperties(o => o.Allbody);
//exist.Allbody = item.Allbody.Replace("\n", "");
//exist.AddOnlyProperties(o => o.Allbody);
exist.AnlysisTime = DateTime.Now;
exist.AddOnlyProperties(o => o.AnlysisTime);
exist.AnlysisUserName = KiviiContext.CurrentMember.FullName;
......@@ -425,6 +427,61 @@ namespace Njust.Pdf.Analysis.Tranforms
}
}
public class AnalyseAllbody : RestfulExecution<Analyse>
{
public List<Guid> Kvids { get; set; }
public bool IsFiles { get; set; }
public override object OnExecution(IRequest req, IResponse res)
{
Kvids.ThrowIfNullOrEmpty("Kvids is needed!");
var conn = KiviiContext.GetOpenedDbConnection<Analyse>();
var analysis = conn.SelectByIds<Analyse>(Kvids);
analysis.ThrowIfNullOrEmpty("未找到目标!");
var queryPdfEntityFile = conn.From<EntityDbFile<Analyse>>();
queryPdfEntityFile.Where(o => o.ParentKvid == Guid.Empty & o.DbFolderPath == "/Pdf/Analysis" & o.Extension.ToLower() == ".pdf" & Sql.In(o.OwnerKvid, Kvids));
var pdfEntityFiles = conn.Select(queryPdfEntityFile);
var rtns = new RestfulQueryResponse<Analyse>();
rtns.Results = new List<Analyse>();
foreach(var item in analysis)
{
var pdfFile = pdfEntityFiles.FirstOrDefault(o => o.OwnerKvid == item.Kvid);
if (pdfFile == null) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
var physicalStorageFilePath = pdfFile.GetPhysicalPath();
if (!File.Exists(physicalStorageFilePath)) throw new Exception($"未找Pdf文件:{item.Title},HashCode:{item.HashCode}");
var text = Kivii.PdfBox.Instance.ReadText(physicalStorageFilePath);
item.Allbody = text;
rtns.Results.Add(item);
}
if (IsFiles)
{
var memoryStream = new MemoryStream();
using (var zipFile = new ZipFile(System.Text.Encoding.UTF8))
{
foreach (var item in rtns.Results)
{
byte[] buffer = Encoding.UTF8.GetBytes(item.Allbody);
zipFile.AddEntry(item.FileName.ToLower().Replace(".pdf", "") + "_全文.txt", buffer);
}
zipFile.Save(memoryStream);
}
memoryStream.Position = 0;
return new HttpResult(memoryStream, $"导出全文工{rtns.Results.Count}份文件.zip", true);
}
rtns.Total = rtns.Results.Count();
return rtns;
}
}
#region CRUDQ
[RequiresAnyRole(SystemRoles.Everyone)]
public class AuthorCreate : RestfulCreate<Author>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment